Skip to content

ENH: implement tm.shares_memory #44747

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,15 @@
UInt64Index,
)
from pandas.core.arrays import (
BaseMaskedArray,
DatetimeArray,
ExtensionArray,
PandasArray,
PeriodArray,
TimedeltaArray,
period_array,
)
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray

if TYPE_CHECKING:
from pandas import (
Expand Down Expand Up @@ -1050,3 +1053,51 @@ def at(x):

def iat(x):
return x.iat


# -----------------------------------------------------------------------------


def shares_memory(left, right) -> bool:
"""
Pandas-compat for np.shares_memory.
"""
if isinstance(left, np.ndarray) and isinstance(right, np.ndarray):
return np.shares_memory(left, right)
elif isinstance(left, np.ndarray):
# Call with reversed args to get to unpacking logic below.
return shares_memory(right, left)

if isinstance(left, RangeIndex):
return False
if isinstance(left, MultiIndex):
return shares_memory(left._codes, right)
if isinstance(left, (Index, Series)):
return shares_memory(left._values, right)

if isinstance(left, NDArrayBackedExtensionArray):
return shares_memory(left._ndarray, right)
if isinstance(left, pd.SparseArray):
return shares_memory(left.sp_values, right)

if isinstance(left, ExtensionArray) and left.dtype == "string[pyarrow]":
# https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jorisvandenbossche can you confirm that this is the right logic to use for ArrowStringArray?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but this will only be correct if both left and right only have a single chunk, which is not guaranteed to be the case (although probably will be the case in all tests where this is being used). Maybe adding an assert to guarantee this would be good

if isinstance(right, ExtensionArray) and right.dtype == "string[pyarrow]":
left_pa_data = left._data
right_pa_data = right._data
left_buf1 = left_pa_data.chunk(0).buffers()[1]
right_buf1 = right_pa_data.chunk(0).buffers()[1]
return left_buf1 == right_buf1

if isinstance(left, BaseMaskedArray) and isinstance(right, BaseMaskedArray):
# By convention, we'll say these share memory if they share *either*
# the _data or the _mask
return np.shares_memory(left._data, right._data) or np.shares_memory(
left._mask, right._mask
)

if isinstance(left, DataFrame) and len(left._mgr.arrays) == 1:
arr = left._mgr.arrays[0]
return shares_memory(arr, right)

raise NotImplementedError(type(left), type(right))
2 changes: 1 addition & 1 deletion pandas/tests/arrays/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -738,7 +738,7 @@ def test_from_sequence_copy(self):

result = Categorical._from_sequence(cat, dtype=None, copy=True)

assert not np.shares_memory(result._codes, cat._codes)
assert not tm.shares_memory(result, cat)

@pytest.mark.xfail(
not IS64 or is_platform_windows(),
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/arrays/floating/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,5 @@ def test_unary_float_operators(float_ea_dtype, source, neg_target, abs_target):

tm.assert_extension_array_equal(neg_result, neg_target)
tm.assert_extension_array_equal(pos_result, arr)
assert not np.shares_memory(pos_result._data, arr._data)
assert not np.shares_memory(pos_result._mask, arr._mask)
assert not tm.shares_memory(pos_result, arr)
tm.assert_extension_array_equal(abs_result, abs_target)
6 changes: 2 additions & 4 deletions pandas/tests/arrays/floating/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,7 @@ def test_astype_copy():
# copy=True -> ensure both data and mask are actual copies
result = arr.astype("Float64", copy=True)
assert result is not arr
assert not np.shares_memory(result._data, arr._data)
assert not np.shares_memory(result._mask, arr._mask)
assert not tm.shares_memory(result, arr)
result[0] = 10
tm.assert_extension_array_equal(arr, orig)
result[0] = pd.NA
Expand All @@ -101,8 +100,7 @@ def test_astype_copy():
orig = pd.array([0.1, 0.2, None], dtype="Float64")

result = arr.astype("Float32", copy=False)
assert not np.shares_memory(result._data, arr._data)
assert not np.shares_memory(result._mask, arr._mask)
assert not tm.shares_memory(result, arr)
result[0] = 10
tm.assert_extension_array_equal(arr, orig)
result[0] = pd.NA
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/arrays/integer/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,5 @@ def test_unary_int_operators(any_signed_int_ea_dtype, source, neg_target, abs_ta

tm.assert_extension_array_equal(neg_result, neg_target)
tm.assert_extension_array_equal(pos_result, arr)
assert not np.shares_memory(pos_result._data, arr._data)
assert not np.shares_memory(pos_result._mask, arr._mask)
assert not tm.shares_memory(pos_result, arr)
tm.assert_extension_array_equal(abs_result, abs_target)
6 changes: 2 additions & 4 deletions pandas/tests/arrays/integer/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,7 @@ def test_astype_copy():
# copy=True -> ensure both data and mask are actual copies
result = arr.astype("Int64", copy=True)
assert result is not arr
assert not np.shares_memory(result._data, arr._data)
assert not np.shares_memory(result._mask, arr._mask)
assert not tm.shares_memory(result, arr)
result[0] = 10
tm.assert_extension_array_equal(arr, orig)
result[0] = pd.NA
Expand All @@ -176,8 +175,7 @@ def test_astype_copy():
orig = pd.array([1, 2, 3, None], dtype="Int64")

result = arr.astype("Int32", copy=False)
assert not np.shares_memory(result._data, arr._data)
assert not np.shares_memory(result._mask, arr._mask)
assert not tm.shares_memory(result, arr)
result[0] = 10
tm.assert_extension_array_equal(arr, orig)
result[0] = pd.NA
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/arrays/masked_shared.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""
Tests shared by MaskedArray subclasses.
"""
import numpy as np

import pandas as pd
import pandas._testing as tm
Expand Down Expand Up @@ -56,7 +55,7 @@ class NumericOps:

def test_no_shared_mask(self, data):
result = data + 1
assert np.shares_memory(result._mask, data._mask) is False
assert not tm.shares_memory(result, data)

def test_array(self, comparison_op, dtype):
op = comparison_op
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/arrays/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,15 +178,15 @@ def test_array_copy():
a = np.array([1, 2])
# default is to copy
b = pd.array(a, dtype=a.dtype)
assert np.shares_memory(a, b._ndarray) is False
assert not tm.shares_memory(a, b)

# copy=True
b = pd.array(a, dtype=a.dtype, copy=True)
assert np.shares_memory(a, b._ndarray) is False
assert not tm.shares_memory(a, b)

# copy=False
b = pd.array(a, dtype=a.dtype, copy=False)
assert np.shares_memory(a, b._ndarray) is True
assert tm.shares_memory(a, b)


cet = pytz.timezone("CET")
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def test_constructor_copy():
arr = np.array([0, 1])
result = PandasArray(arr, copy=True)

assert np.shares_memory(result._ndarray, arr) is False
assert not tm.shares_memory(result, arr)


def test_constructor_with_data(any_numpy_array):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arrays/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,11 @@ def test_pos(self):

result = +arr
tm.assert_timedelta_array_equal(result, arr)
assert not np.shares_memory(result._ndarray, arr._ndarray)
assert not tm.shares_memory(result, arr)

result2 = np.positive(arr)
tm.assert_timedelta_array_equal(result2, arr)
assert not np.shares_memory(result2._ndarray, arr._ndarray)
assert not tm.shares_memory(result2, arr)

def test_neg(self):
vals = np.array([-3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]")
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ def _check_inplace(self, is_inplace, orig, arr, obj):
if arr.dtype.kind in ["m", "M"]:
# We may not have the same DTA/TDA, but will have the same
# underlying data
assert arr._data is obj._values._data
assert arr._ndarray is obj._values._ndarray
else:
assert obj._values is arr
else:
Expand Down