pandas-dev · lithomas1 · Jan 7, 2023 · Jan 7, 2023 · Jan 9, 2023 · Jan 10, 2023
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -5530,7 +5530,7 @@ def _replace_columnwise(
         DataFrame or None
         """
         # Operate column-wise
-        res = self if inplace else self.copy()
+        res = self if inplace else self.copy(deep=None)
         ax = self.columns
 
         for i, ax_value in enumerate(ax):

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -7046,6 +7046,7 @@ def replace(
                 to_replace = [to_replace]
 
             if isinstance(to_replace, (tuple, list)):
+                # TODO: Consider copy-on-write for non-replaced columns's here
                 if isinstance(self, ABCDataFrame):
                     from pandas import Series
 
@@ -7105,7 +7106,7 @@ def replace(
             if not self.size:
                 if inplace:
                     return None
-                return self.copy()
+                return self.copy(deep=None)
 
             if is_dict_like(to_replace):
                 if is_dict_like(value):  # {'A' : NA} -> {'A' : 0}

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -1220,8 +1220,81 @@ def value_getitem(placement):
             if inplace and blk.should_store(value):
                 # Updating inplace -> check if we need to do Copy-on-Write
                 if using_copy_on_write() and not self._has_no_reference_block(blkno_l):
-                    blk.set_inplace(blk_locs, value_getitem(val_locs), copy=True)
-                    self._clear_reference_block(blkno_l)
+                    leftover_blocks = tuple(blk.delete(blk_locs))
+                    vals = np.broadcast_to(
+                        value_getitem(val_locs), blk.values[blk_locs].shape
+                    )
+
+                    # Fill before the first leftover block
+                    nbs = []
+                    leftover_start_i = leftover_blocks[0].mgr_locs.as_slice.start
+
+                    if leftover_start_i != 0:
+                        nbs.append(
+                            new_block_2d(
+                                vals[:leftover_start_i],
+                                placement=BlockPlacement(slice(0, leftover_start_i)),
+                            )
+                        )
+
+                    # Every hole in between leftover blocks is where we need to insert
+                    # a new block
+                    curr_idx = leftover_start_i
+                    for i in range(len(leftover_blocks) - 1):
+                        curr_block = leftover_blocks[i]
+                        next_block = leftover_blocks[i + 1]
+
+                        curr_end = curr_block.mgr_locs.as_slice.stop
+                        next_start = next_block.mgr_locs.as_slice.start
+
+                        num_to_fill = next_start - curr_end
+                        nb = new_block_2d(
+                            vals[curr_idx : curr_idx + num_to_fill],
+                            placement=BlockPlacement(
+                                slice(curr_end, curr_end + num_to_fill)
+                            ),
+                        )
+                        nbs.append(nb)
+                        curr_idx += num_to_fill
+
+                    # Fill after the last leftover block
+                    last_del_loc = blk_locs[-1] + 1
+                    last_leftover_loc = leftover_blocks[-1].mgr_locs.as_slice.stop
+                    if last_del_loc > last_leftover_loc:
+                        diff = last_del_loc - last_leftover_loc
+                        nbs.append(
+                            new_block_2d(
+                                vals[curr_idx : curr_idx + diff],
+                                placement=BlockPlacement(
+                                    slice(last_leftover_loc, last_del_loc)
+                                ),
+                            )
+                        )
+                    # Add new block where old block was and remaining blocks at
+                    # the end to avoid updating all block numbers
+                    old_blocks = self.blocks
+                    nbs = tuple(nbs)
+                    nb = nbs[0]
+                    new_blocks = (
+                        old_blocks[:blkno_l]
+                        + (nb,)
+                        + old_blocks[blkno_l + 1 :]
+                        + leftover_blocks
+                        + tuple(nbs[1:])
+                    )
+                    if self.refs is not None:
+                        prev_refs = self.refs[blkno_l]
+                        extra_refs = [prev_refs] * len(leftover_blocks)
+                        self.refs[blkno_l] = None
+                        self.refs += extra_refs
+                        if len(blk_locs) > 1:
+                            # Add the refs for the other deleted blocks
+                            self.refs += [None] * (len(blk_locs) - 1)
+                    self._blklocs[nb.mgr_locs.indexer] = np.arange(len(nb))
+                    for i, nb in enumerate(leftover_blocks + nbs[1:]):
+                        self._blklocs[nb.mgr_locs.indexer] = np.arange(len(nb))
+                        self._blknos[nb.mgr_locs.indexer] = i + len(old_blocks)
+                    self.blocks = new_blocks
                 else:
                     blk.set_inplace(blk_locs, value_getitem(val_locs))
             else:

diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py
@@ -5,6 +5,7 @@
 
 import pandas as pd
 from pandas import DataFrame
+import pandas._testing as tm
 from pandas.tests.copy_view.util import get_array
 
 
@@ -93,3 +94,47 @@ def test_switch_options():
         subset.iloc[0, 0] = 0
         # df updated with CoW disabled
         assert df.iloc[0, 0] == 0
+
+
+@td.skip_array_manager_invalid_test
+@pytest.mark.parametrize(
+    "locs, arr",
+    [
+        ([0], np.array([-1, -2, -3])),
+        ([1], np.array([-1, -2, -3])),
+        ([5], np.array([-1, -2, -3])),
+        ([0, 1], np.array([-1, -2, -3])),
+        ([0, 2], np.array([-1, -2, -3])),
+        ([0, 1, 2], np.array([-1, -2, -3])),
+        ([1, 2], np.array([-1, -2, -3])),
+        ([1, 3], np.array([-1, -2, -3])),
+        ([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
+    ],
+)
+def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr):
+    # Nothing currently calls iset with
+    # more than 1 loc with inplace=True (only happens with inplace=False)
+    # but ensure that it works
+    df = DataFrame(
+        {
+            "a": [1, 2, 3],
+            "b": [4, 5, 6],
+            "c": [7, 8, 9],
+            "d": [10, 11, 12],
+            "e": [13, 14, 15],
+            "f": ["foo", "bar", "baz"],
+        }
+    )
+    df_orig = df.copy()
+    df2 = df.copy(deep=None)  # Trigger a CoW (if enabled, otherwise makes copy)
+    df2._mgr.iset(locs, arr, inplace=True)
+
+    tm.assert_frame_equal(df, df_orig)
+
+    if using_copy_on_write:
+        for i, col in enumerate(df.columns):
+            if i not in locs:
+                assert np.shares_memory(get_array(df, col), get_array(df2, col))
+    else:
+        for col in df.columns:
+            assert not np.shares_memory(get_array(df, col), get_array(df2, col))
diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
@@ -903,6 +903,40 @@ def test_squeeze(using_copy_on_write):
         assert df.loc[0, "a"] == 0
 
 
+@pytest.mark.parametrize(
+    "replace_kwargs",
+    [
+        {"to_replace": {"a": 1, "b": 4}, "value": -1},
+        # Test CoW splits blocks to avoid copying unchanged columns
+        {"to_replace": {"a": 1}, "value": -1},
+        {"to_replace": {"b": 4}, "value": -1},
+        {"to_replace": {"b": {4: 1}}},
+        # TODO: Add these in a further optimization
+        # We would need to see which columns got replaced in the mask
+        # which could be expensive
+        # {"to_replace": {"b": 1}},
+        # 1
+    ],
+)
+def test_replace(using_copy_on_write, replace_kwargs):
+    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": ["foo", "bar", "baz"]})
+    df_orig = df.copy()
+
+    df_replaced = df.replace(**replace_kwargs)
+
+    if using_copy_on_write:
+        if (df_replaced["b"] == df["b"]).all():
+            assert np.shares_memory(get_array(df_replaced, "b"), get_array(df, "b"))
+        assert np.shares_memory(get_array(df_replaced, "c"), get_array(df, "c"))
+
+    # mutating squeezed df triggers a copy-on-write for that column/block
+    df_replaced.loc[0, "c"] = -1
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(df_replaced, "c"), get_array(df, "a"))
+
+    tm.assert_frame_equal(df, df_orig)
+
+
 def test_putmask(using_copy_on_write):
     df = DataFrame({"a": [1, 2], "b": 1, "c": 2})
     view = df[:]