init

Zonglin Peng · Zonglin Peng · commit c29492079a12 · 2025-03-03T16:09:33.000-08:00
diff --git a/backends/cadence/reference/operators/quantized_relu_out.cpp b/backends/cadence/reference/operators/quantized_relu_out.cpp
@@ -7,6 +7,7 @@
  */
 
 #include <executorch/backends/cadence/reference/kernels/kernels.h>
+#include <executorch/backends/cadence/reference/operators/operators.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 
 namespace impl {
@@ -75,6 +76,59 @@ void quantized_relu_out(
   }
 }
 
+template <typename T>
+void quantized_relu_per_tensor_out_(
+    __ET_UNUSED KernelRuntimeContext& ctx,
+    const Tensor& input,
+    const int64_t in_zero_point,
+    const int64_t out_zero_point,
+    const int64_t out_multiplier,
+    const int64_t out_shift,
+    Tensor& output) {
+  const T* __restrict__ in = input.const_data_ptr<T>();
+  T* __restrict__ out = output.mutable_data_ptr<T>();
+
+  // Compute the out_scale from out_multiplier and out_shift
+  const float out_scale = -out_multiplier * 1.0 / (1 << 31) * pow(2, out_shift);
+
+  for (size_t i = 0, e = input.numel(); i < e; ++i) {
+    const float temp = in[i] > in_zero_point ? (in[i] - in_zero_point) : 0;
+    out[i] = kernels::quantize<T>(temp, out_scale, out_zero_point);
+  }
+}
+
+void quantized_relu_per_tensor_out(
+    KernelRuntimeContext& ctx,
+    const Tensor& input,
+    const int64_t in_zero_point,
+    const int64_t out_zero_point,
+    const int64_t out_multiplier,
+    const int64_t out_shift,
+    Tensor& output) {
+#define typed_quantized_relu(ctype, dtype)    \
+  case executorch::aten::ScalarType::dtype: { \
+    quantized_relu_per_tensor_out_<ctype>(    \
+        ctx,                                  \
+        input,                                \
+        in_zero_point,                        \
+        out_zero_point,                       \
+        out_multiplier,                       \
+        out_shift,                            \
+        output);                              \
+    break;                                    \
+  }
+
+  executorch::aten::ScalarType dtype = input.scalar_type();
+  switch (dtype) {
+    ET_FORALL_CADENCE_QUANTIZED_TYPES(typed_quantized_relu)
+    default:
+      ET_DCHECK_MSG(
+          false, "Unhandled dtype %s", torch::executor::toString(dtype));
+  }
+
+#undef typed_quantized_relu
+}
+
 }; // namespace native
 }; // namespace reference
 }; // namespace impl