Implement dpnp.compress and dpnp_array.compress method (#2177)

ndgrigorian · vtavana · commit 4f4a446cc39c · 2024-12-02T13:42:18.000-08:00
* Implement dpnp.compress * Add `dpnp_array.compress` method * Break up `compress` to satisfy pylint Also disable checks for protected access, as `compress` uses dpctl.tensor private functions * Unmute third-party tests for `compress` * Use `get_usm_allocations` in `compress` * Fix bug where `out` in `compress` is dpnp_array Also removes an unnecessary check per PR review * Apply comments per PR review by @antonwolfy Also fix a typo when `condition` is not an array * Remove branching when `condition` is an array Also tweaks to docstring * Add tests for `compress` * Re-use `_take_index` for `dpnp.take` Should slightly improve efficiency by escaping an additional copy where `out` is not `None` and flattening of indices * Change error for incorrect out array dtype to `TypeError` * Move compress tests into a TestCompress class * Use NumPy in compress tests * Add `no_none=True` to `test_compress_condition_all_dtypes` * Add USM type and SYCL queue tests for `compress` * More tests for compress added * Docstring change per PR review * Integrate test for compute follows data in compress into test_2in_1out * Add test for `dpnp_array.compress` and add a test for strided inputs to `compress` * Refactor `test_compress` in test_usm_type.py into `test_2in_1out`
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
@@ -786,7 +786,14 @@ def clip(self, min=None, max=None, out=None, **kwargs):
 
         return dpnp.clip(self, min, max, out=out, **kwargs)
 
-    # 'compress',
+    def compress(self, condition, axis=None, out=None):
+        """
+        Select slices of an array along a given axis.
+
+        Refer to :obj:`dpnp.compress` for full documentation.
+        """
+
+        return dpnp.compress(condition, self, axis=axis, out=out)
 
     def conj(self):
         """
diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py
@@ -37,10 +37,16 @@
 
 """
 
+# pylint: disable=protected-access
+
 import operator
 
 import dpctl.tensor as dpt
+import dpctl.tensor._tensor_impl as ti
+import dpctl.utils as dpu
 import numpy
+from dpctl.tensor._copy_utils import _nonzero_impl
+from dpctl.tensor._indexing_functions import _get_indexing_mode
 from dpctl.tensor._numpy_helper import normalize_axis_index
 
 import dpnp
@@ -55,6 +61,7 @@
 
 __all__ = [
     "choose",
+    "compress",
     "diag_indices",
     "diag_indices_from",
     "diagonal",
@@ -155,6 +162,157 @@ def choose(x1, choices, out=None, mode="raise"):
     return call_origin(numpy.choose, x1, choices, out, mode)
 
 
+def _take_index(x, inds, axis, q, usm_type, out=None, mode=0):
+    # arg validation assumed done by caller
+    x_sh = x.shape
+    axis_end = axis + 1
+    if 0 in x_sh[axis:axis_end] and inds.size != 0:
+        raise IndexError("cannot take non-empty indices from an empty axis")
+    res_sh = x_sh[:axis] + inds.shape + x_sh[axis_end:]
+
+    if out is not None:
+        out = dpnp.get_usm_ndarray(out)
+
+        if not out.flags.writable:
+            raise ValueError("provided `out` array is read-only")
+
+        if out.shape != res_sh:
+            raise ValueError(
+                "The shape of input and output arrays are inconsistent. "
+                f"Expected output shape is {res_sh}, got {out.shape}"
+            )
+
+        if x.dtype != out.dtype:
+            raise TypeError(
+                f"Output array of type {x.dtype} is needed, " f"got {out.dtype}"
+            )
+
+        if dpu.get_execution_queue((q, out.sycl_queue)) is None:
+            raise dpu.ExecutionPlacementError(
+                "Input and output allocation queues are not compatible"
+            )
+
+        if ti._array_overlap(x, out):
+            # Allocate a temporary buffer to avoid memory overlapping.
+            out = dpt.empty_like(out)
+    else:
+        out = dpt.empty(res_sh, dtype=x.dtype, usm_type=usm_type, sycl_queue=q)
+
+    _manager = dpu.SequentialOrderManager[q]
+    dep_evs = _manager.submitted_events
+
+    h_ev, take_ev = ti._take(
+        src=x,
+        ind=(inds,),
+        dst=out,
+        axis_start=axis,
+        mode=mode,
+        sycl_queue=q,
+        depends=dep_evs,
+    )
+    _manager.add_event_pair(h_ev, take_ev)
+
+    return out
+
+
+def compress(condition, a, axis=None, out=None):
+    """
+    Return selected slices of an array along given axis.
+
+    A slice of `a` is returned for each index along `axis` where `condition`
+    is ``True``.
+
+    For full documentation refer to :obj:`numpy.choose`.
+
+    Parameters
+    ----------
+    condition : {array_like, dpnp.ndarray, usm_ndarray}
+        Array that selects which entries to extract. If the length of
+        `condition` is less than the size of `a` along `axis`, then
+        the output is truncated to the length of `condition`.
+    a : {dpnp.ndarray, usm_ndarray}
+        Array to extract from.
+    axis : {None, int}, optional
+        Axis along which to extract slices. If ``None``, works over the
+        flattened array.
+        Default: ``None``.
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
+        If provided, the result will be placed in this array. It should
+        be of the appropriate shape and dtype.
+        Default: ``None``.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        A copy of the slices of `a` where `condition` is ``True``.
+
+    See also
+    --------
+    :obj:`dpnp.take` :  Take elements from an array along an axis.
+    :obj:`dpnp.choose` : Construct an array from an index array and a set of
+                         arrays to choose from.
+    :obj:`dpnp.diag` : Extract a diagonal or construct a diagonal array.
+    :obj:`dpnp.diagonal` : Return specified diagonals.
+    :obj:`dpnp.select` : Return an array drawn from elements in `choicelist`,
+                         depending on conditions.
+    :obj:`dpnp.ndarray.compress` : Equivalent method.
+    :obj:`dpnp.extract` : Equivalent function when working on 1-D arrays.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> a = np.array([[1, 2], [3, 4], [5, 6]])
+    >>> a
+    array([[1, 2],
+           [3, 4],
+           [5, 6]])
+    >>> np.compress([0, 1], a, axis=0)
+    array([[3, 4]])
+    >>> np.compress([False, True, True], a, axis=0)
+    array([[3, 4],
+           [5, 6]])
+    >>> np.compress([False, True], a, axis=1)
+    array([[2],
+           [4],
+           [6]])
+
+    Working on the flattened array does not return slices along an axis but
+    selects elements.
+
+    >>> np.compress([False, True], a)
+    array([2])
+    """
+
+    dpnp.check_supported_arrays_type(a)
+    if axis is None:
+        if a.ndim != 1:
+            a = dpnp.ravel(a)
+        axis = 0
+    axis = normalize_axis_index(operator.index(axis), a.ndim)
+
+    a_ary = dpnp.get_usm_ndarray(a)
+    cond_ary = dpnp.as_usm_ndarray(
+        condition,
+        dtype=dpnp.bool,
+        usm_type=a_ary.usm_type,
+        sycl_queue=a_ary.sycl_queue,
+    )
+
+    if not cond_ary.ndim == 1:
+        raise ValueError(
+            "`condition` must be a 1-D array or un-nested sequence"
+        )
+
+    res_usm_type, exec_q = get_usm_allocations([a_ary, cond_ary])
+
+    # _nonzero_impl synchronizes and returns a tuple of usm_ndarray indices
+    inds = _nonzero_impl(cond_ary)
+
+    res = _take_index(a_ary, inds[0], axis, exec_q, res_usm_type, out=out)
+
+    return dpnp.get_result_array(res, out=out)
+
+
 def diag_indices(n, ndim=2, device=None, usm_type="device", sycl_queue=None):
     """
     Return the indices to access the main diagonal of an array.
@@ -1806,8 +1964,8 @@ def take(a, indices, /, *, axis=None, out=None, mode="wrap"):
 
     """
 
-    if mode not in ("wrap", "clip"):
-        raise ValueError(f"`mode` must be 'wrap' or 'clip', but got `{mode}`.")
+    # sets mode to 0 for "wrap" and 1 for "clip", raises otherwise
+    mode = _get_indexing_mode(mode)
 
     usm_a = dpnp.get_usm_ndarray(a)
     if not dpnp.is_supported_array_type(indices):
@@ -1817,34 +1975,28 @@ def take(a, indices, /, *, axis=None, out=None, mode="wrap"):
     else:
         usm_ind = dpnp.get_usm_ndarray(indices)
 
+    res_usm_type, exec_q = get_usm_allocations([usm_a, usm_ind])
+
     a_ndim = a.ndim
     if axis is None:
-        res_shape = usm_ind.shape
-
         if a_ndim > 1:
-            # dpt.take requires flattened input array
+            # flatten input array
             usm_a = dpt.reshape(usm_a, -1)
+        axis = 0
     elif a_ndim == 0:
         axis = normalize_axis_index(operator.index(axis), 1)
-        res_shape = usm_ind.shape
     else:
         axis = normalize_axis_index(operator.index(axis), a_ndim)
-        a_sh = a.shape
-        res_shape = a_sh[:axis] + usm_ind.shape + a_sh[axis + 1 :]
-
-    if usm_ind.ndim != 1:
-        # dpt.take supports only 1-D array of indices
-        usm_ind = dpt.reshape(usm_ind, -1)
 
     if not dpnp.issubdtype(usm_ind.dtype, dpnp.integer):
         # dpt.take supports only integer dtype for array of indices
         usm_ind = dpt.astype(usm_ind, dpnp.intp, copy=False, casting="safe")
 
-    usm_res = dpt.take(usm_a, usm_ind, axis=axis, mode=mode)
+    usm_res = _take_index(
+        usm_a, usm_ind, axis, exec_q, res_usm_type, out=out, mode=mode
+    )
 
-    # need to reshape the result if shape of indices array was changed
-    result = dpnp.reshape(usm_res, res_shape)
-    return dpnp.get_result_array(result, out)
+    return dpnp.get_result_array(usm_res, out=out)
 
 
 def take_along_axis(a, indices, axis, mode="wrap"):
diff --git a/dpnp/tests/test_indexing.py b/dpnp/tests/test_indexing.py
@@ -1,9 +1,11 @@
 import functools
 
+import dpctl
 import dpctl.tensor as dpt
 import numpy
 import pytest
 from dpctl.tensor._numpy_helper import AxisError
+from dpctl.utils import ExecutionPlacementError
 from numpy.testing import (
     assert_,
     assert_array_equal,
@@ -1333,3 +1335,101 @@ def test_error(self):
             dpnp.select([x0], [x1], default=x1)
         with pytest.raises(TypeError):
             dpnp.select([x1], [x1])
+
+
+class TestCompress:
+    def test_compress_basic(self):
+        conditions = [True, False, True]
+        a_np = numpy.arange(16).reshape(4, 4)
+        a = dpnp.arange(16).reshape(4, 4)
+        cond_np = numpy.array(conditions)
+        cond = dpnp.array(conditions)
+        expected = numpy.compress(cond_np, a_np, axis=0)
+        result = dpnp.compress(cond, a, axis=0)
+        assert_array_equal(expected, result)
+
+    def test_compress_method_basic(self):
+        conditions = [True, True, False, True]
+        a_np = numpy.arange(3 * 4).reshape(3, 4)
+        a = dpnp.arange(3 * 4).reshape(3, 4)
+        cond_np = numpy.array(conditions)
+        cond = dpnp.array(conditions)
+        expected = a_np.compress(cond_np, axis=1)
+        result = a.compress(cond, axis=1)
+        assert_array_equal(expected, result)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
+    def test_compress_condition_all_dtypes(self, dtype):
+        a_np = numpy.arange(10, dtype="i4")
+        a = dpnp.arange(10, dtype="i4")
+        cond_np = numpy.tile(numpy.asarray([0, 1], dtype=dtype), 5)
+        cond = dpnp.tile(dpnp.asarray([0, 1], dtype=dtype), 5)
+        expected = numpy.compress(cond_np, a_np)
+        result = dpnp.compress(cond, a)
+        assert_array_equal(expected, result)
+
+    def test_compress_invalid_out_errors(self):
+        q1 = dpctl.SyclQueue()
+        q2 = dpctl.SyclQueue()
+        a = dpnp.ones(10, dtype="i4", sycl_queue=q1)
+        condition = dpnp.asarray([True], sycl_queue=q1)
+        out_bad_shape = dpnp.empty_like(a)
+        with pytest.raises(ValueError):
+            dpnp.compress(condition, a, out=out_bad_shape)
+        out_bad_queue = dpnp.empty(1, dtype="i4", sycl_queue=q2)
+        with pytest.raises(ExecutionPlacementError):
+            dpnp.compress(condition, a, out=out_bad_queue)
+        out_bad_dt = dpnp.empty(1, dtype="i8", sycl_queue=q1)
+        with pytest.raises(TypeError):
+            dpnp.compress(condition, a, out=out_bad_dt)
+        out_read_only = dpnp.empty(1, dtype="i4", sycl_queue=q1)
+        out_read_only.flags.writable = False
+        with pytest.raises(ValueError):
+            dpnp.compress(condition, a, out=out_read_only)
+
+    def test_compress_empty_axis(self):
+        a = dpnp.ones((10, 0, 5), dtype="i4")
+        condition = [True, False, True]
+        r = dpnp.compress(condition, a, axis=0)
+        assert r.shape == (2, 0, 5)
+        # empty take from empty axis is permitted
+        assert dpnp.compress([False], a, axis=1).shape == (10, 0, 5)
+        # non-empty take from empty axis raises IndexError
+        with pytest.raises(IndexError):
+            dpnp.compress(condition, a, axis=1)
+
+    def test_compress_in_overlaps_out(self):
+        conditions = [False, True, True]
+        a_np = numpy.arange(6)
+        a = dpnp.arange(6)
+        cond_np = numpy.array(conditions)
+        cond = dpnp.array(conditions)
+        out = a[2:4]
+        expected = numpy.compress(cond_np, a_np, axis=None)
+        result = dpnp.compress(cond, a, axis=None, out=out)
+        assert_array_equal(expected, result)
+        assert result is out
+        assert (a[2:4] == out).all()
+
+    def test_compress_condition_not_1d(self):
+        a = dpnp.arange(4)
+        cond = dpnp.ones((1, 4), dtype="?")
+        with pytest.raises(ValueError):
+            dpnp.compress(cond, a, axis=None)
+
+    def test_compress_strided(self):
+        a = dpnp.arange(20)
+        a_np = dpnp.asnumpy(a)
+        cond = dpnp.tile(dpnp.array([True, False, False, True]), 5)
+        cond_np = dpnp.asnumpy(cond)
+        result = dpnp.compress(cond, a)
+        expected = numpy.compress(cond_np, a_np)
+        assert_array_equal(result, expected)
+        # use axis keyword
+        a = dpnp.arange(50).reshape(10, 5)
+        a_np = dpnp.asnumpy(a)
+        cond = dpnp.array(dpnp.array([True, False, False, True, False]))
+        cond_np = dpnp.asnumpy(cond)
+        result = dpnp.compress(cond, a)
+        expected = numpy.compress(cond_np, a_np)
+        assert_array_equal(result, expected)
diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py
@@ -718,6 +718,7 @@ def test_reduce_hypot(device):
         ),
         pytest.param("append", [1, 2, 3], [4, 5, 6]),
         pytest.param("arctan2", [-1, +1, +1, -1], [-1, -1, +1, +1]),
+        pytest.param("compress", [0, 1, 1, 0], [0, 1, 2, 3]),
         pytest.param("copysign", [0.0, 1.0, 2.0], [-1.0, 0.0, 1.0]),
         pytest.param(
             "corrcoef",
diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py
@@ -686,6 +686,7 @@ def test_1in_1out(func, data, usm_type):
         ),
         pytest.param("append", [1, 2, 3], [4, 5, 6]),
         pytest.param("arctan2", [-1, +1, +1, -1], [-1, -1, +1, +1]),
+        pytest.param("compress", [False, True, True], [0, 1, 2, 3, 4]),
         pytest.param("copysign", [0.0, 1.0, 2.0], [-1.0, 0.0, 1.0]),
         pytest.param("cross", [1.0, 2.0, 3.0], [4.0, 5.0, 6.0]),
         pytest.param("digitize", [0.2, 6.4, 3.0], [0.0, 1.0, 2.5, 4.0]),
diff --git a/dpnp/tests/third_party/cupy/indexing_tests/test_indexing.py b/dpnp/tests/third_party/cupy/indexing_tests/test_indexing.py