Skip to content

BUG: DataFrame(dict_of_series) raising depending on order of dict #45018

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Dec 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,7 @@ Other Deprecations
- Deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`)
- Deprecated :meth:`Index.__getitem__` with a bool key; use ``index.values[key]`` to get the old behavior (:issue:`44051`)
- Deprecated downcasting column-by-column in :meth:`DataFrame.where` with integer-dtypes (:issue:`44597`)
- Deprecated :meth:`DatetimeIndex.union_many`, use :meth:`DatetimeIndex.union` instead (:issue:`44091`)
- Deprecated :meth:`.Groupby.pad` in favor of :meth:`.Groupby.ffill` (:issue:`33396`)
- Deprecated :meth:`.Groupby.backfill` in favor of :meth:`.Groupby.bfill` (:issue:`33396`)
- Deprecated :meth:`.Resample.pad` in favor of :meth:`.Resample.ffill` (:issue:`33396`)
Expand Down Expand Up @@ -708,6 +709,7 @@ Datetimelike
- Bug in :meth:`Series.mode` with ``DatetimeTZDtype`` incorrectly returning timezone-naive and ``PeriodDtype`` incorrectly raising (:issue:`41927`)
- Bug in :class:`DateOffset`` addition with :class:`Timestamp` where ``offset.nanoseconds`` would not be included in the result (:issue:`43968`, :issue:`36589`)
- Bug in :meth:`Timestamp.fromtimestamp` not supporting the ``tz`` argument (:issue:`45083`)
- Bug in :class:`DataFrame` construction from dict of :class:`Series` with mismatched index dtypes sometimes raising depending on the ordering of the passed dict (:issue:`44091`)
-

Timedelta
Expand Down
43 changes: 36 additions & 7 deletions pandas/core/indexes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
)
from pandas.errors import InvalidIndexError

from pandas.core.dtypes.common import is_dtype_equal

from pandas.core.indexes.base import (
Index,
_new_Index,
Expand Down Expand Up @@ -213,14 +215,41 @@ def conv(i):

if kind == "special":
result = indexes[0]
first = result

dtis = [x for x in indexes if isinstance(x, DatetimeIndex)]
dti_tzs = [x for x in dtis if x.tz is not None]
if len(dti_tzs) not in [0, len(dtis)]:
# TODO: this behavior is not tested (so may not be desired),
# but is kept in order to keep behavior the same when
# deprecating union_many
# test_frame_from_dict_with_mixed_indexes
raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")

if len(dtis) == len(indexes):
sort = True
if not all(is_dtype_equal(x.dtype, first.dtype) for x in indexes):
# i.e. timezones mismatch
# TODO(2.0): once deprecation is enforced, this union will
# cast to UTC automatically.
indexes = [x.tz_convert("UTC") for x in indexes]

result = indexes[0]

elif len(dtis) > 1:
# If we have mixed timezones, our casting behavior may depend on
# the order of indexes, which we don't want.
sort = False

# TODO: what about Categorical[dt64]?
# test_frame_from_dict_with_mixed_indexes
indexes = [x.astype(object, copy=False) for x in indexes]
result = indexes[0]

for other in indexes[1:]:
result = result.union(other, sort=None if sort else False)
return result

if hasattr(result, "union_many"):
# DatetimeIndex
return result.union_many(indexes[1:])
else:
for other in indexes[1:]:
result = result.union(other, sort=None if sort else False)
return result
elif kind == "array":
index = indexes[0]
if not all(index.equals(other) for other in indexes[1:]):
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,13 @@ def union_many(self, others):
"""
A bit of a hack to accelerate unioning a collection of indexes.
"""
warnings.warn(
"DatetimeIndex.union_many is deprecated and will be removed in "
"a future version. Use obj.union instead.",
FutureWarning,
stacklevel=find_stack_level(),
)

this = self

for other in others:
Expand Down
46 changes: 45 additions & 1 deletion pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2666,6 +2666,50 @@ def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self):
exp = pd.period_range("1/1/1980", "1/1/2012", freq="M")
tm.assert_index_equal(df.index, exp)

def test_frame_from_dict_with_mixed_tzaware_indexes(self):
# GH#44091
dti = date_range("2016-01-01", periods=3)

ser1 = Series(range(3), index=dti)
ser2 = Series(range(3), index=dti.tz_localize("UTC"))
ser3 = Series(range(3), index=dti.tz_localize("US/Central"))
ser4 = Series(range(3))

# no tz-naive, but we do have mixed tzs and a non-DTI
df1 = DataFrame({"A": ser2, "B": ser3, "C": ser4})
exp_index = Index(
list(ser2.index) + list(ser3.index) + list(ser4.index), dtype=object
)
tm.assert_index_equal(df1.index, exp_index)

df2 = DataFrame({"A": ser2, "C": ser4, "B": ser3})
exp_index3 = Index(
list(ser2.index) + list(ser4.index) + list(ser3.index), dtype=object
)
tm.assert_index_equal(df2.index, exp_index3)

df3 = DataFrame({"B": ser3, "A": ser2, "C": ser4})
exp_index3 = Index(
list(ser3.index) + list(ser2.index) + list(ser4.index), dtype=object
)
tm.assert_index_equal(df3.index, exp_index3)

df4 = DataFrame({"C": ser4, "B": ser3, "A": ser2})
exp_index4 = Index(
list(ser4.index) + list(ser3.index) + list(ser2.index), dtype=object
)
tm.assert_index_equal(df4.index, exp_index4)

# TODO: not clear if these raising is desired (no extant tests),
# but this is de facto behavior 2021-12-22
msg = "Cannot join tz-naive with tz-aware DatetimeIndex"
with pytest.raises(TypeError, match=msg):
DataFrame({"A": ser2, "B": ser3, "C": ser4, "D": ser1})
with pytest.raises(TypeError, match=msg):
DataFrame({"A": ser2, "B": ser3, "D": ser1})
with pytest.raises(TypeError, match=msg):
DataFrame({"D": ser1, "A": ser2, "B": ser3})


class TestDataFrameConstructorWithDtypeCoercion:
def test_floating_values_integer_dtype(self):
Expand Down Expand Up @@ -2911,7 +2955,7 @@ def test_construction_from_ndarray_with_eadtype_mismatched_columns(self):
DataFrame(arr2, columns=["foo", "bar"])


def get1(obj):
def get1(obj): # TODO: make a helper in tm?
if isinstance(obj, Series):
return obj.iloc[0]
else:
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/indexes/datetimes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)


def test_union_many_deprecated():
dti = date_range("2016-01-01", periods=3)

with tm.assert_produces_warning(FutureWarning):
dti.union_many([dti, dti])


class TestDatetimeIndexSetOps:
tz = [
None,
Expand Down