Skip to content

Commit c1234db

Browse files
authored
CLN: Stopped dtype inference in sanitize_array with Index[object] (#58655)
1 parent 6694b79 commit c1234db

File tree

4 files changed

+9
-35
lines changed

4 files changed

+9
-35
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ Removal of prior version deprecations/changes
244244
- Removed extension test classes ``BaseNoReduceTests``, ``BaseNumericReduceTests``, ``BaseBooleanReduceTests`` (:issue:`54663`)
245245
- Removed the "closed" and "normalize" keywords in :meth:`DatetimeIndex.__new__` (:issue:`52628`)
246246
- Require :meth:`SparseDtype.fill_value` to be a valid value for the :meth:`SparseDtype.subtype` (:issue:`53043`)
247+
- Stopped performing dtype inference when setting a :class:`Index` into a :class:`DataFrame` (:issue:`56102`)
247248
- Stopped performing dtype inference with in :meth:`Index.insert` with object-dtype index; this often affects the index/columns that result when setting new entries into an empty :class:`Series` or :class:`DataFrame` (:issue:`51363`)
248249
- Removed the "closed" and "unit" keywords in :meth:`TimedeltaIndex.__new__` (:issue:`52628`, :issue:`55499`)
249250
- All arguments in :meth:`Index.sort_values` are now keyword only (:issue:`56493`)

pandas/core/construction.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
ensure_object,
3939
is_list_like,
4040
is_object_dtype,
41-
is_string_dtype,
4241
pandas_dtype,
4342
)
4443
from pandas.core.dtypes.dtypes import NumpyEADtype
@@ -555,9 +554,7 @@ def sanitize_array(
555554
# Avoid ending up with a NumpyExtensionArray
556555
dtype = dtype.numpy_dtype
557556

558-
object_index = False
559-
if isinstance(data, ABCIndex) and data.dtype == object and dtype is None:
560-
object_index = True
557+
data_was_index = isinstance(data, ABCIndex)
561558

562559
# extract ndarray or ExtensionArray, ensure we have no NumpyExtensionArray
563560
data = extract_array(data, extract_numpy=True, extract_range=True)
@@ -610,15 +607,8 @@ def sanitize_array(
610607

611608
if dtype is None:
612609
subarr = data
613-
if data.dtype == object:
610+
if data.dtype == object and not data_was_index:
614611
subarr = maybe_infer_to_datetimelike(data)
615-
if (
616-
object_index
617-
and using_pyarrow_string_dtype()
618-
and is_string_dtype(subarr)
619-
):
620-
# Avoid inference when string option is set
621-
subarr = data
622612
elif data.dtype.kind == "U" and using_pyarrow_string_dtype():
623613
from pandas.core.arrays.string_ import StringDtype
624614

pandas/core/frame.py

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5059,22 +5059,7 @@ def _sanitize_column(self, value) -> tuple[ArrayLike, BlockValuesRefs | None]:
50595059

50605060
if is_list_like(value):
50615061
com.require_length_match(value, self.index)
5062-
arr = sanitize_array(value, self.index, copy=True, allow_2d=True)
5063-
if (
5064-
isinstance(value, Index)
5065-
and value.dtype == "object"
5066-
and arr.dtype != value.dtype
5067-
): #
5068-
# TODO: Remove kludge in sanitize_array for string mode when enforcing
5069-
# this deprecation
5070-
warnings.warn(
5071-
"Setting an Index with object dtype into a DataFrame will stop "
5072-
"inferring another dtype in a future version. Cast the Index "
5073-
"explicitly before setting it into the DataFrame.",
5074-
FutureWarning,
5075-
stacklevel=find_stack_level(),
5076-
)
5077-
return arr, None
5062+
return sanitize_array(value, self.index, copy=True, allow_2d=True), None
50785063

50795064
@property
50805065
def _series(self):

pandas/tests/frame/indexing/test_setitem.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -782,20 +782,18 @@ def test_loc_setitem_ea_dtype(self):
782782
df.iloc[:, 0] = Series([11], dtype="Int64")
783783
tm.assert_frame_equal(df, expected)
784784

785-
def test_setitem_object_inferring(self):
785+
def test_setitem_index_object_dtype_not_inferring(self):
786786
# GH#56102
787787
idx = Index([Timestamp("2019-12-31")], dtype=object)
788788
df = DataFrame({"a": [1]})
789-
with tm.assert_produces_warning(FutureWarning, match="infer"):
790-
df.loc[:, "b"] = idx
791-
with tm.assert_produces_warning(FutureWarning, match="infer"):
792-
df["c"] = idx
789+
df.loc[:, "b"] = idx
790+
df["c"] = idx
793791

794792
expected = DataFrame(
795793
{
796794
"a": [1],
797-
"b": Series([Timestamp("2019-12-31")], dtype="datetime64[ns]"),
798-
"c": Series([Timestamp("2019-12-31")], dtype="datetime64[ns]"),
795+
"b": idx,
796+
"c": idx,
799797
}
800798
)
801799
tm.assert_frame_equal(df, expected)

0 commit comments

Comments
 (0)