Skip to content

CoW: Remove remaining cow occurrences from tests #57477

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1987,14 +1987,6 @@ def indexer_ial(request):
return request.param


@pytest.fixture
def using_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is enabled.
"""
return True


@pytest.fixture
def using_infer_string() -> bool:
"""
Expand Down
268 changes: 91 additions & 177 deletions pandas/tests/copy_view/test_functions.py

Large diffs are not rendered by default.

942 changes: 300 additions & 642 deletions pandas/tests/copy_view/test_methods.py

Large diffs are not rendered by default.

10 changes: 0 additions & 10 deletions pandas/tests/generic/test_duplicate_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,16 +89,6 @@ def test_preserve_getitem(self):
assert df.loc[[0]].flags.allows_duplicate_labels is False
assert df.loc[0, ["A"]].flags.allows_duplicate_labels is False

def test_ndframe_getitem_caching_issue(self, request, using_copy_on_write):
if not using_copy_on_write:
request.applymarker(pytest.mark.xfail(reason="Unclear behavior."))
# NDFrame.__getitem__ will cache the first df['A']. May need to
# invalidate that cache? Update the cached entries?
df = pd.DataFrame({"A": [0]}).set_flags(allows_duplicate_labels=False)
assert df["A"].flags.allows_duplicate_labels is False
df.flags.allows_duplicate_labels = True
assert df["A"].flags.allows_duplicate_labels is True

@pytest.mark.parametrize(
"objs, kwargs",
[
Expand Down
7 changes: 2 additions & 5 deletions pandas/tests/indexes/period/test_partial_slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@


class TestPeriodIndex:
def test_getitem_periodindex_duplicates_string_slice(self, using_copy_on_write):
def test_getitem_periodindex_duplicates_string_slice(self):
# monotonic
idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="Y-JUN")
ts = Series(np.random.default_rng(2).standard_normal(len(idx)), index=idx)
Expand All @@ -22,10 +22,7 @@ def test_getitem_periodindex_duplicates_string_slice(self, using_copy_on_write):
expected = ts[1:3]
tm.assert_series_equal(result, expected)
result[:] = 1
if using_copy_on_write:
tm.assert_series_equal(ts, original)
else:
assert (ts[1:3] == 1).all()
tm.assert_series_equal(ts, original)

# not monotonic
idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="Y-JUN")
Expand Down
4 changes: 1 addition & 3 deletions pandas/tests/indexes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

class TestCommon:
@pytest.mark.parametrize("name", [None, "new_name"])
def test_to_frame(self, name, index_flat, using_copy_on_write):
def test_to_frame(self, name, index_flat):
# see GH#15230, GH#22580
idx = index_flat

Expand All @@ -46,8 +46,6 @@ def test_to_frame(self, name, index_flat, using_copy_on_write):
assert df.index is idx
assert len(df.columns) == 1
assert df.columns[0] == idx_name
if not using_copy_on_write:
assert df[idx_name].values is not idx.values

df = idx.to_frame(index=False, name=idx_name)
assert df.index is not idx
Expand Down
32 changes: 10 additions & 22 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,7 @@ def test_reindex_items(self):
mgr.iget(3).internal_values(), reindexed.iget(3).internal_values()
)

def test_get_numeric_data(self, using_copy_on_write):
def test_get_numeric_data(self):
mgr = create_mgr(
"int: int; float: float; complex: complex;"
"str: object; bool: bool; obj: object; dt: datetime",
Expand All @@ -774,18 +774,12 @@ def test_get_numeric_data(self, using_copy_on_write):
np.array([100.0, 200.0, 300.0]),
inplace=True,
)
if using_copy_on_write:
tm.assert_almost_equal(
mgr.iget(mgr.items.get_loc("float")).internal_values(),
np.array([1.0, 1.0, 1.0]),
)
else:
tm.assert_almost_equal(
mgr.iget(mgr.items.get_loc("float")).internal_values(),
np.array([100.0, 200.0, 300.0]),
)
tm.assert_almost_equal(
mgr.iget(mgr.items.get_loc("float")).internal_values(),
np.array([1.0, 1.0, 1.0]),
)

def test_get_bool_data(self, using_copy_on_write):
def test_get_bool_data(self):
mgr = create_mgr(
"int: int; float: float; complex: complex;"
"str: object; bool: bool; obj: object; dt: datetime",
Expand All @@ -801,16 +795,10 @@ def test_get_bool_data(self, using_copy_on_write):
)

bools.iset(0, np.array([True, False, True]), inplace=True)
if using_copy_on_write:
tm.assert_numpy_array_equal(
mgr.iget(mgr.items.get_loc("bool")).internal_values(),
np.array([True, True, True]),
)
else:
tm.assert_numpy_array_equal(
mgr.iget(mgr.items.get_loc("bool")).internal_values(),
np.array([True, False, True]),
)
tm.assert_numpy_array_equal(
mgr.iget(mgr.items.get_loc("bool")).internal_values(),
np.array([True, True, True]),
)

def test_unicode_repr_doesnt_raise(self):
repr(create_mgr("b,\u05d0: object"))
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/parser/common/test_file_buffer_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ def test_context_manageri_user_provided(all_parsers, datapath):


@skip_pyarrow # ParserError: Empty CSV file
def test_file_descriptor_leak(all_parsers, using_copy_on_write):
def test_file_descriptor_leak(all_parsers):
# GH 31488
parser = all_parsers
with tm.ensure_clean() as path:
Expand Down
41 changes: 5 additions & 36 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
import numpy as np
import pytest

from pandas._config import using_copy_on_write

from pandas.compat import is_platform_windows
from pandas.compat.pyarrow import (
pa_version_under11p0,
Expand Down Expand Up @@ -425,15 +423,10 @@ def test_read_filters(self, engine, tmp_path):
repeat=1,
)

def test_write_index(self, engine, using_copy_on_write, request):
check_names = engine != "fastparquet"
if using_copy_on_write and engine == "fastparquet":
request.applymarker(
pytest.mark.xfail(reason="fastparquet write into index")
)

def test_write_index(self):
pytest.importorskip("pyarrow")
df = pd.DataFrame({"A": [1, 2, 3]})
check_round_trip(df, engine)
check_round_trip(df, "pyarrow")

indexes = [
[2, 3, 4],
Expand All @@ -446,12 +439,12 @@ def test_write_index(self, engine, using_copy_on_write, request):
df.index = index
if isinstance(index, pd.DatetimeIndex):
df.index = df.index._with_freq(None) # freq doesn't round-trip
check_round_trip(df, engine, check_names=check_names)
check_round_trip(df, "pyarrow")

# index with meta-data
df.index = [0, 1, 2]
df.index.name = "foo"
check_round_trip(df, engine)
check_round_trip(df, "pyarrow")

def test_write_multiindex(self, pa):
# Not supported in fastparquet as of 0.1.3 or older pyarrow version
Expand Down Expand Up @@ -1256,23 +1249,6 @@ def test_error_on_using_partition_cols_and_partition_on(
partition_cols=partition_cols,
)

@pytest.mark.skipif(using_copy_on_write(), reason="fastparquet writes into Index")
def test_empty_dataframe(self, fp):
# GH #27339
df = pd.DataFrame()
expected = df.copy()
check_round_trip(df, fp, expected=expected)

@pytest.mark.skipif(using_copy_on_write(), reason="fastparquet writes into Index")
def test_timezone_aware_index(self, fp, timezone_aware_date_list):
idx = 5 * [timezone_aware_date_list]

df = pd.DataFrame(index=idx, data={"index_as_col": idx})

expected = df.copy()
expected.index.name = "index"
check_round_trip(df, fp, expected=expected)

def test_close_file_handle_on_read_error(self):
with tm.ensure_clean("test.parquet") as path:
pathlib.Path(path).write_bytes(b"breakit")
Expand Down Expand Up @@ -1361,10 +1337,3 @@ def test_invalid_dtype_backend(self, engine):
df.to_parquet(path)
with pytest.raises(ValueError, match=msg):
read_parquet(path, dtype_backend="numpy")

@pytest.mark.skipif(using_copy_on_write(), reason="fastparquet writes into Index")
def test_empty_columns(self, fp):
# GH 52034
df = pd.DataFrame(index=pd.Index(["a", "b", "c"], name="custom name"))
expected = pd.DataFrame(index=pd.Index(["a", "b", "c"], name="custom name"))
check_round_trip(df, fp, expected=expected)
12 changes: 3 additions & 9 deletions pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,26 +36,20 @@ def test_reindex(self, multiindex_dataframe_random_data):
tm.assert_frame_equal(reindexed, expected)

def test_reindex_preserve_levels(
self, multiindex_year_month_day_dataframe_random_data, using_copy_on_write
self, multiindex_year_month_day_dataframe_random_data
):
ymd = multiindex_year_month_day_dataframe_random_data

new_index = ymd.index[::10]
chunk = ymd.reindex(new_index)
if using_copy_on_write:
assert chunk.index.is_(new_index)
else:
assert chunk.index is new_index
assert chunk.index.is_(new_index)

chunk = ymd.loc[new_index]
assert chunk.index.equals(new_index)

ymdT = ymd.T
chunk = ymdT.reindex(columns=new_index)
if using_copy_on_write:
assert chunk.columns.is_(new_index)
else:
assert chunk.columns is new_index
assert chunk.columns.is_(new_index)

chunk = ymdT.loc[:, new_index]
assert chunk.columns.equals(new_index)
Expand Down