Skip to content

CoW: Ignore copy=True when copy_on_write is enabled #51464

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Mar 8, 2023
4 changes: 2 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11095,7 +11095,7 @@ def to_timestamp(
>>> df2.index
DatetimeIndex(['2023-01-31', '2024-01-31'], dtype='datetime64[ns]', freq=None)
"""
new_obj = self.copy(deep=copy)
new_obj = self.copy(deep=copy and not using_copy_on_write())

axis_name = self._get_axis_name(axis)
old_ax = getattr(self, axis_name)
Expand Down Expand Up @@ -11152,7 +11152,7 @@ def to_period(
>>> idx.to_period("Y")
PeriodIndex(['2001', '2002', '2003'], dtype='period[A-DEC]')
"""
new_obj = self.copy(deep=copy)
new_obj = self.copy(deep=copy and not using_copy_on_write())

axis_name = self._get_axis_name(axis)
old_ax = getattr(self, axis_name)
Expand Down
33 changes: 23 additions & 10 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ def set_flags(
>>> df2.flags.allows_duplicate_labels
False
"""
df = self.copy(deep=copy)
df = self.copy(deep=copy and not using_copy_on_write())
if allows_duplicate_labels is not None:
df.flags["allows_duplicate_labels"] = allows_duplicate_labels
return df
Expand Down Expand Up @@ -713,7 +713,7 @@ def _set_axis_nocheck(
else:
# With copy=False, we create a new object but don't copy the
# underlying data.
obj = self.copy(deep=copy)
obj = self.copy(deep=copy and not using_copy_on_write())
setattr(obj, obj._get_axis_name(axis), labels)
return obj

Expand Down Expand Up @@ -742,7 +742,7 @@ def swapaxes(
j = self._get_axis_number(axis2)

if i == j:
return self.copy(deep=copy)
return self.copy(deep=copy and not using_copy_on_write())

mapping = {i: j, j: i}

Expand Down Expand Up @@ -999,7 +999,7 @@ def _rename(
index = mapper

self._check_inplace_and_allows_duplicate_labels(inplace)
result = self if inplace else self.copy(deep=copy)
result = self if inplace else self.copy(deep=copy and not using_copy_on_write())

for axis_no, replacements in enumerate((index, columns)):
if replacements is None:
Expand Down Expand Up @@ -1215,6 +1215,9 @@ class name

inplace = validate_bool_kwarg(inplace, "inplace")

if copy and using_copy_on_write():
copy = False

if mapper is not lib.no_default:
# Use v0.23 behavior if a scalar or list
non_mapper = is_scalar(mapper) or (
Expand Down Expand Up @@ -5331,6 +5334,8 @@ def reindex(

# if all axes that are requested to reindex are equal, then only copy
# if indicated must have index names equal here as well as values
if copy and using_copy_on_write():
copy = False
if all(
self._get_axis(axis_name).identical(ax)
for axis_name, ax in axes.items()
Expand Down Expand Up @@ -5425,10 +5430,14 @@ def _reindex_with_indexers(
# If we've made a copy once, no need to make another one
copy = False

if (copy or copy is None) and new_data is self._mgr:
if (
(copy or copy is None)
and new_data is self._mgr
and not using_copy_on_write()
):
new_data = new_data.copy(deep=copy)
elif using_copy_on_write() and new_data is self._mgr:
new_data = new_data.copy(deep=copy)
new_data = new_data.copy(deep=False)

return self._constructor(new_data).__finalize__(self)

Expand Down Expand Up @@ -6293,6 +6302,9 @@ def astype(
2 2020-01-03
dtype: datetime64[ns]
"""
if copy and using_copy_on_write():
copy = False

if is_dict_like(dtype):
if self.ndim == 1: # i.e. Series
if len(dtype) > 1 or self.name not in dtype:
Expand Down Expand Up @@ -9551,6 +9563,8 @@ def _align_series(
fill_axis: Axis = 0,
):
is_series = isinstance(self, ABCSeries)
if copy and using_copy_on_write():
copy = False

if (not is_series and axis is None) or axis not in [None, 0, 1]:
raise ValueError("Must specify axis=0 or 1")
Expand Down Expand Up @@ -10319,8 +10333,7 @@ def truncate(
if isinstance(ax, MultiIndex):
setattr(result, self._get_axis_name(axis), ax.truncate(before, after))

if copy or (copy is None and not using_copy_on_write()):
result = result.copy(deep=copy)
result = result.copy(deep=copy and not using_copy_on_write())

return result

Expand Down Expand Up @@ -10401,7 +10414,7 @@ def _tz_convert(ax, tz):
raise ValueError(f"The level {level} is not valid")
ax = _tz_convert(ax, tz)

result = self.copy(deep=copy)
result = self.copy(deep=copy and not using_copy_on_write())
result = result.set_axis(ax, axis=axis, copy=False)
return result.__finalize__(self, method="tz_convert")

Expand Down Expand Up @@ -10583,7 +10596,7 @@ def _tz_localize(ax, tz, ambiguous, nonexistent):
raise ValueError(f"The level {level} is not valid")
ax = _tz_localize(ax, tz, ambiguous, nonexistent)

result = self.copy(deep=copy)
result = self.copy(deep=copy and not using_copy_on_write())
result = result.set_axis(ax, axis=axis, copy=False)
return result.__finalize__(self, method="tz_localize")

Expand Down
4 changes: 4 additions & 0 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,8 @@ def astype(self: T, dtype, copy: bool | None = False, errors: str = "raise") ->
copy = False
else:
copy = True
elif using_copy_on_write():
copy = False

return self.apply(
"astype",
Expand All @@ -459,6 +461,8 @@ def convert(self: T, copy: bool | None) -> T:
copy = False
else:
copy = True
elif using_copy_on_write():
copy = False

return self.apply("convert", copy=copy, using_cow=using_copy_on_write())

Expand Down
2 changes: 2 additions & 0 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,8 @@ def concat(
copy = False
else:
copy = True
elif copy and using_copy_on_write():
copy = False

op = _Concatenator(
objs,
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4099,7 +4099,7 @@ def swaplevel(
{examples}
"""
assert isinstance(self.index, MultiIndex)
result = self.copy(deep=copy)
result = self.copy(deep=copy and not using_copy_on_write())
result.index = self.index.swaplevel(i, j)
return result

Expand Down Expand Up @@ -5713,7 +5713,7 @@ def to_timestamp(
if not isinstance(self.index, PeriodIndex):
raise TypeError(f"unsupported Type {type(self.index).__name__}")

new_obj = self.copy(deep=copy)
new_obj = self.copy(deep=copy and not using_copy_on_write())
new_index = self.index.to_timestamp(freq=freq, how=how)
setattr(new_obj, "index", new_index)
return new_obj
Expand Down Expand Up @@ -5753,7 +5753,7 @@ def to_period(self, freq: str | None = None, copy: bool | None = None) -> Series
if not isinstance(self.index, DatetimeIndex):
raise TypeError(f"unsupported Type {type(self.index).__name__}")

new_obj = self.copy(deep=copy)
new_obj = self.copy(deep=copy and not using_copy_on_write())
new_index = self.index.to_period(freq=freq)
setattr(new_obj, "index", new_index)
return new_obj
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/copy_view/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,21 @@ def test_concat_mixed_series_frame(using_copy_on_write):
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("copy", [True, None, False])
def test_concat_copy_keyword(using_copy_on_write, copy):
df = DataFrame({"a": [1, 2]})
df2 = DataFrame({"b": [1.5, 2.5]})

result = concat([df, df2], axis=1, copy=copy)

if using_copy_on_write or copy is False:
assert np.shares_memory(get_array(df, "a"), get_array(result, "a"))
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
else:
assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))


@pytest.mark.parametrize(
"func",
[
Expand Down Expand Up @@ -280,3 +295,18 @@ def test_merge_on_key_enlarging_one(using_copy_on_write, func, how):
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
tm.assert_frame_equal(df1, df1_orig)
tm.assert_frame_equal(df2, df2_orig)


@pytest.mark.parametrize("copy", [True, None, False])
def test_merge_copy_keyword(using_copy_on_write, copy):
df = DataFrame({"a": [1, 2]})
df2 = DataFrame({"b": [3, 4.5]})

result = df.merge(df2, copy=copy, left_index=True, right_index=True)

if using_copy_on_write or copy is False:
assert np.shares_memory(get_array(df, "a"), get_array(result, "a"))
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
else:
assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
91 changes: 83 additions & 8 deletions pandas/tests/copy_view/test_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def test_copy_shallow(using_copy_on_write):
lambda df, copy: df.rename(columns=str.lower, copy=copy),
lambda df, copy: df.reindex(columns=["a", "c"], copy=copy),
lambda df, copy: df.reindex_like(df, copy=copy),
lambda df, copy: df.align(df, copy=copy)[0],
lambda df, copy: df.set_axis(["a", "b", "c"], axis="index", copy=copy),
lambda df, copy: df.rename_axis(index="test", copy=copy),
lambda df, copy: df.rename_axis(columns="test", copy=copy),
Expand All @@ -84,6 +85,7 @@ def test_copy_shallow(using_copy_on_write):
"rename",
"reindex",
"reindex_like",
"align",
"set_axis",
"rename_axis0",
"rename_axis1",
Expand Down Expand Up @@ -115,22 +117,96 @@ def test_methods_copy_keyword(
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}, index=index)
df2 = method(df, copy=copy)

share_memory = (using_copy_on_write and copy is not True) or copy is False
share_memory = using_copy_on_write or copy is False

if request.node.callspec.id.startswith("reindex-"):
# TODO copy=False without CoW still returns a copy in this case
if not using_copy_on_write and not using_array_manager and copy is False:
share_memory = False
# TODO copy=True with CoW still returns a view
if using_copy_on_write:
share_memory = True

if share_memory:
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
else:
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))


@pytest.mark.parametrize("copy", [True, None, False])
@pytest.mark.parametrize(
"method",
[
lambda ser, copy: ser.rename(index={0: 100}, copy=copy),
lambda ser, copy: ser.reindex(index=ser.index, copy=copy),
lambda ser, copy: ser.reindex_like(ser, copy=copy),
lambda ser, copy: ser.align(ser, copy=copy)[0],
lambda ser, copy: ser.set_axis(["a", "b", "c"], axis="index", copy=copy),
lambda ser, copy: ser.rename_axis(index="test", copy=copy),
lambda ser, copy: ser.astype("int64", copy=copy),
lambda ser, copy: ser.swaplevel(0, 1, copy=copy),
lambda ser, copy: ser.swapaxes(0, 0, copy=copy),
lambda ser, copy: ser.truncate(0, 5, copy=copy),
lambda ser, copy: ser.infer_objects(copy=copy),
lambda ser, copy: ser.to_timestamp(copy=copy),
lambda ser, copy: ser.to_period(freq="D", copy=copy),
lambda ser, copy: ser.tz_localize("US/Central", copy=copy),
lambda ser, copy: ser.tz_convert("US/Central", copy=copy),
lambda ser, copy: ser.set_flags(allows_duplicate_labels=False, copy=copy),
],
ids=[
"rename",
"reindex",
"reindex_like",
"align",
"set_axis",
"rename_axis0",
"astype",
"swaplevel",
"swapaxes",
"truncate",
"infer_objects",
"to_timestamp",
"to_period",
"tz_localize",
"tz_convert",
"set_flags",
],
)
def test_methods_series_copy_keyword(request, method, copy, using_copy_on_write):
index = None
if "to_timestamp" in request.node.callspec.id:
index = period_range("2012-01-01", freq="D", periods=3)
elif "to_period" in request.node.callspec.id:
index = date_range("2012-01-01", freq="D", periods=3)
elif "tz_localize" in request.node.callspec.id:
index = date_range("2012-01-01", freq="D", periods=3)
elif "tz_convert" in request.node.callspec.id:
index = date_range("2012-01-01", freq="D", periods=3, tz="Europe/Brussels")
elif "swaplevel" in request.node.callspec.id:
index = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]])

ser = Series([1, 2, 3], index=index)
ser2 = method(ser, copy=copy)

share_memory = using_copy_on_write or copy is False

if share_memory:
assert np.shares_memory(get_array(ser2), get_array(ser))
else:
assert not np.shares_memory(get_array(ser2), get_array(ser))


@pytest.mark.parametrize("copy", [True, None, False])
def test_transpose_copy_keyword(using_copy_on_write, copy, using_array_manager):
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
result = df.transpose(copy=copy)
share_memory = using_copy_on_write or copy is False or copy is None
share_memory = share_memory and not using_array_manager

if share_memory:
assert np.shares_memory(get_array(df, "a"), get_array(result, 0))
else:
assert not np.shares_memory(get_array(df, "a"), get_array(result, 0))


# -----------------------------------------------------------------------------
# DataFrame methods returning new DataFrame using shallow copy

Expand Down Expand Up @@ -1119,14 +1195,13 @@ def test_set_flags(using_copy_on_write):
tm.assert_series_equal(ser, expected)


@pytest.mark.parametrize("copy_kwargs", [{"copy": True}, {}])
@pytest.mark.parametrize("kwargs", [{"mapper": "test"}, {"index": "test"}])
def test_rename_axis(using_copy_on_write, kwargs, copy_kwargs):
def test_rename_axis(using_copy_on_write, kwargs):
df = DataFrame({"a": [1, 2, 3, 4]}, index=Index([1, 2, 3, 4], name="a"))
df_orig = df.copy()
df2 = df.rename_axis(**kwargs, **copy_kwargs)
df2 = df.rename_axis(**kwargs)

if using_copy_on_write and not copy_kwargs:
if using_copy_on_write:
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
else:
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/frame/methods/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,10 @@ def test_reindex_copies_ea(self, using_copy_on_write):

# pass both columns and index
result2 = df.reindex(columns=cols, index=df.index, copy=True)
assert not np.shares_memory(result2[0].array._data, df[0].array._data)
if using_copy_on_write:
assert np.shares_memory(result2[0].array._data, df[0].array._data)
else:
assert not np.shares_memory(result2[0].array._data, df[0].array._data)

@td.skip_array_manager_not_yet_implemented
def test_reindex_date_fill_value(self):
Expand Down
Loading