Skip to content

BUG: loc casting to object for multi block case when setting with list indexer #49161

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ Interval
Indexing
^^^^^^^^
- Bug in :meth:`DataFrame.reindex` filling with wrong values when indexing columns and index for ``uint`` dtypes (:issue:`48184`)
- Bug in :meth:`DataFrame.loc` coercing dtypes when setting values with a list indexer (:issue:`49159`)
- Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` when right hand side is :class:`DataFrame` with :class:`MultiIndex` columns (:issue:`49121`)
- Bug in :meth:`DataFrame.reindex` casting dtype to ``object`` when :class:`DataFrame` has single extension array column when re-indexing ``columns`` and ``index`` (:issue:`48190`)
- Bug in :func:`~DataFrame.describe` when formatting percentiles in the resulting index showed more decimals than needed (:issue:`46362`)
Expand Down
12 changes: 8 additions & 4 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1898,16 +1898,20 @@ def _setitem_with_indexer_2d_value(self, indexer, value):

ilocs = self._ensure_iterable_column_indexer(indexer[1])

# GH#7551 Note that this coerces the dtype if we are mixed
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the GH reference here worth keeping somewhere?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't think so, since we are avoiding this now

value = np.array(value, dtype=object)
if not is_array_like(value):
# cast lists to array
value = np.array(value, dtype=object)
if len(ilocs) != value.shape[1]:
raise ValueError(
"Must have equal len keys and value when setting with an ndarray"
)

for i, loc in enumerate(ilocs):
# setting with a list, re-coerces
self._setitem_single_column(loc, value[:, i].tolist(), pi)
value_col = value[:, i]
if is_object_dtype(value_col.dtype):
# casting to list so that we do type inference in setitem_single_column
value_col = value_col.tolist()
self._setitem_single_column(loc, value_col, pi)

def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str):
ilocs = self._ensure_iterable_column_indexer(indexer[1])
Expand Down
11 changes: 2 additions & 9 deletions pandas/tests/frame/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@


class TestDataFrameSetitemCoercion:
@pytest.mark.xfail(reason="Unnecessary cast.")
@pytest.mark.parametrize("consolidate", [True, False])
def test_loc_setitem_multiindex_columns(self, consolidate):
# GH#18415 Setting values in a single column preserves dtype,
Expand All @@ -36,9 +35,7 @@ def test_loc_setitem_multiindex_columns(self, consolidate):
A.loc[2:3, (1, slice(2, 3))] = np.ones((2, 2), dtype=np.float32)
assert (A.dtypes == np.float32).all()

msg = "will attempt to set the values inplace instead"
with tm.assert_produces_warning(FutureWarning, match=msg):
A.loc[0:5, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32)
A.loc[0:5, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32)

assert (A.dtypes == np.float32).all()

Expand Down Expand Up @@ -119,7 +116,6 @@ def test_15231():
tm.assert_series_equal(df.dtypes, exp_dtypes)


@pytest.mark.xfail(reason="Unnecessarily upcasts to float64")
def test_iloc_setitem_unnecesssary_float_upcasting():
# GH#12255
df = DataFrame(
Expand All @@ -132,10 +128,7 @@ def test_iloc_setitem_unnecesssary_float_upcasting():
orig = df.copy()

values = df[0].values.reshape(2, 1)

msg = "will attempt to set the values inplace instead"
with tm.assert_produces_warning(FutureWarning, match=msg):
df.iloc[:, 0:1] = values
df.iloc[:, 0:1] = values

tm.assert_frame_equal(df, orig)

Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1405,6 +1405,19 @@ def test_loc_named_tuple_for_midx(self):
)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("indexer", [["a"], "a"])
@pytest.mark.parametrize("col", [{}, {"b": 1}])
def test_set_2d_casting_date_to_int(self, col, indexer):
# GH#49159
df = DataFrame(
{"a": [Timestamp("2022-12-29"), Timestamp("2022-12-30")], **col},
)
df.loc[[1], indexer] = df["a"] + pd.Timedelta(days=1)
expected = DataFrame(
{"a": [Timestamp("2022-12-29"), Timestamp("2022-12-31")], **col},
)
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize("col", [{}, {"name": "a"}])
def test_loc_setitem_reordering_with_all_true_indexer(self, col):
# GH#48701
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ def test_multi_assign(self):
{
"FC": ["a", np.nan, "a", "b", "a", "b"],
"PF": [0, 0, 0, 0, 1, 1],
"col1": [0.0, 1.0, 4.0, 6.0, 8.0, 10.0],
"col1": [0, 1, 4, 6, 8, 10],
"col2": [12, 7, 16, np.nan, 20, 22],
}
)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1500,12 +1500,12 @@ def test_loc_setitem_unsorted_multiindex_columns(self, key):
mi = MultiIndex.from_tuples([("A", 4), ("B", "3"), ("A", "2")])
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=mi)
obj = df.copy()
obj.loc[:, key] = np.zeros((2, 2), dtype=int)
obj.loc[:, key] = np.zeros((2, 2), dtype="int64")
expected = DataFrame([[0, 2, 0], [0, 5, 0]], columns=mi)
tm.assert_frame_equal(obj, expected)

df = df.sort_index(axis=1)
df.loc[:, key] = np.zeros((2, 2), dtype=int)
df.loc[:, key] = np.zeros((2, 2), dtype="int64")
expected = expected.sort_index(axis=1)
tm.assert_frame_equal(df, expected)

Expand Down