Skip to content

TYP: DataFrame.(index|columns) and Series.index #31126

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -687,7 +687,6 @@ Other API changes
- :meth:`Series.str.__iter__` was deprecated and will be removed in future releases (:issue:`28277`).
- Added ``<NA>`` to the list of default NA values for :meth:`read_csv` (:issue:`30821`)


.. _whatsnew_100.api.documentation:

Documentation Improvements
Expand Down
7 changes: 6 additions & 1 deletion doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,12 @@ Other API changes
- :meth:`Series.describe` will now show distribution percentiles for ``datetime`` dtypes, statistics ``first`` and ``last``
will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`)
-
-

Backwards incompatible API changes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- :meth:`DataFrame.swaplevels` now raises a ``TypeError`` if the axis is not a :class:`MultiIndex`.
Previously a ``AttributeError`` was raised (:issue:`31126`)


.. ---------------------------------------------------------------------------

Expand Down
26 changes: 25 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

from pandas._config import get_option

from pandas._libs import algos as libalgos, lib
from pandas._libs import algos as libalgos, lib, properties
from pandas._typing import Axes, Axis, Dtype, FilePathOrBuffer, Level, Renamer
from pandas.compat import PY37
from pandas.compat._optional import import_optional_dependency
Expand Down Expand Up @@ -91,8 +91,10 @@
)
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCDatetimeIndex,
ABCIndexClass,
ABCMultiIndex,
ABCPeriodIndex,
ABCSeries,
)
from pandas.core.dtypes.missing import isna, notna
Expand Down Expand Up @@ -394,6 +396,7 @@ class DataFrame(NDFrame):
2 7 8 9
"""

_internal_names_set = {"columns", "index"} | NDFrame._internal_names_set
_typ = "dataframe"

@property
Expand Down Expand Up @@ -5290,9 +5293,15 @@ def swaplevel(self, i=-2, j=-1, axis=0) -> "DataFrame":
result = self.copy()

axis = self._get_axis_number(axis)

if not isinstance(result._get_axis(axis), ABCMultiIndex): # pragma: no cover
raise TypeError("Can only swap levels on a hierarchical axis.")

if axis == 0:
assert isinstance(result.index, ABCMultiIndex)
result.index = result.index.swaplevel(i, j)
else:
assert isinstance(result.columns, ABCMultiIndex)
result.columns = result.columns.swaplevel(i, j)
return result

Expand All @@ -5319,8 +5328,10 @@ def reorder_levels(self, order, axis=0) -> "DataFrame":
result = self.copy()

if axis == 0:
assert isinstance(result.index, ABCMultiIndex)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isn't this is a breaking change?

this PR

>>> pd.DataFrame().swaplevel()
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "C:\Users\simon\pandas\pandas\core\frame.py", line 5270, in swaplevel
    assert isinstance(result.index, ABCMultiIndex)
AssertionError
>>>

on master

>>> pd.DataFrame().swaplevel()
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "C:\Users\simon\pandas\pandas\core\frame.py", line 5268, in swaplevel
    result.index = result.index.swaplevel(i, j)
AttributeError: 'Index' object has no attribute 'swaplevel'
>>>

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I've made it a TypeError, to mirror reorder_levels. I also added a Whatsnew note.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, is this an API change? We're reaching 1.0, so in principle we should be wary on changing the API. Should I raise an AttributeError instead of TypeError to maintain compatibility (though that is strictly a wrong error to raise)?

result.index = result.index.reorder_levels(order)
else:
assert isinstance(result.columns, ABCMultiIndex)
result.columns = result.columns.reorder_levels(order)
return result

Expand Down Expand Up @@ -8344,8 +8355,10 @@ def to_timestamp(self, freq=None, how="start", axis=0, copy=True) -> "DataFrame"

axis = self._get_axis_number(axis)
if axis == 0:
assert isinstance(self.index, (ABCDatetimeIndex, ABCPeriodIndex))
new_data.set_axis(1, self.index.to_timestamp(freq=freq, how=how))
elif axis == 1:
assert isinstance(self.columns, (ABCDatetimeIndex, ABCPeriodIndex))
new_data.set_axis(0, self.columns.to_timestamp(freq=freq, how=how))
else: # pragma: no cover
raise AssertionError(f"Axis must be 0 or 1. Got {axis}")
Expand Down Expand Up @@ -8378,8 +8391,10 @@ def to_period(self, freq=None, axis=0, copy=True) -> "DataFrame":

axis = self._get_axis_number(axis)
if axis == 0:
assert isinstance(self.index, ABCDatetimeIndex)
new_data.set_axis(1, self.index.to_period(freq=freq))
elif axis == 1:
assert isinstance(self.columns, ABCDatetimeIndex)
new_data.set_axis(0, self.columns.to_period(freq=freq))
else: # pragma: no cover
raise AssertionError(f"Axis must be 0 or 1. Got {axis}")
Expand Down Expand Up @@ -8482,6 +8497,15 @@ def isin(self, values) -> "DataFrame":
self.columns,
)

# ----------------------------------------------------------------------
# Add index and columns
index: "Index" = properties.AxisProperty(
axis=1, doc="The index (row labels) of the DataFrame."
)
columns: "Index" = properties.AxisProperty(
axis=0, doc="The column labels of the DataFrame."
)

# ----------------------------------------------------------------------
# Add plotting methods to DataFrame
plot = CachedAccessor("plot", pandas.plotting.PlotAccessor)
Expand Down
15 changes: 2 additions & 13 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

from pandas._config import config

from pandas._libs import Timestamp, iNaT, lib, properties
from pandas._libs import Timestamp, iNaT, lib
from pandas._typing import (
Axis,
Dtype,
Expand Down Expand Up @@ -333,18 +333,6 @@ def _setup_axes(cls, axes: List[str], docs: Dict[str, str]) -> None:
cls._info_axis_number = info_axis
cls._info_axis_name = axes[info_axis]

# setup the actual axis
def set_axis(a, i):
setattr(cls, a, properties.AxisProperty(i, docs.get(a, a)))
cls._internal_names_set.add(a)

if axes_are_reversed:
for i, a in cls._AXIS_NAMES.items():
set_axis(a, 1 - i)
else:
for i, a in cls._AXIS_NAMES.items():
set_axis(a, i)

def _construct_axes_dict(self, axes=None, **kwargs):
"""Return an axes dictionary for myself."""
d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)}
Expand Down Expand Up @@ -5083,6 +5071,7 @@ def __finalize__(
self.attrs[name] = other.attrs[name]
# For subclasses using _metadata.
for name in self._metadata:
assert isinstance(name, str)
object.__setattr__(self, name, getattr(other, name, None))
return self

Expand Down
6 changes: 3 additions & 3 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,13 +134,13 @@ def pivot_table(
table = agged.unstack(to_unstack)

if not dropna:
if table.index.nlevels > 1:
if isinstance(table.index, MultiIndex):
m = MultiIndex.from_arrays(
cartesian_product(table.index.levels), names=table.index.names
)
table = table.reindex(m, axis=0)

if table.columns.nlevels > 1:
if isinstance(table.columns, MultiIndex):
m = MultiIndex.from_arrays(
cartesian_product(table.columns.levels), names=table.columns.names
)
Expand Down Expand Up @@ -373,7 +373,7 @@ def _generate_marginal_results_without_values(
):
if len(cols) > 0:
# need to "interleave" the margins
margin_keys = []
margin_keys: Union[List, Index] = []
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could be ArrayLike? (not sure it matters, but can update in followon if this works)

Copy link
Contributor Author

@topper-123 topper-123 Jan 20, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would not work, because Union[List, Index] is the precise type of this.


def _all_key():
if len(cols) == 1:
Expand Down
15 changes: 14 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

from pandas._config import get_option

from pandas._libs import index as libindex, lib, reshape, tslibs
from pandas._libs import index as libindex, lib, properties, reshape, tslibs
from pandas._typing import Label
from pandas.compat.numpy import function as nv
from pandas.util._decorators import Appender, Substitution
Expand All @@ -46,6 +46,8 @@
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCDatetimeIndex,
ABCMultiIndex,
ABCPeriodIndex,
ABCSeries,
ABCSparseArray,
)
Expand Down Expand Up @@ -176,6 +178,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame):

_name: Optional[Hashable]
_metadata: List[str] = ["name"]
_internal_names_set = {"index"} | generic.NDFrame._internal_names_set
_accessors = {"dt", "cat", "str", "sparse"}
_deprecations = (
base.IndexOpsMixin._deprecations
Expand Down Expand Up @@ -3347,6 +3350,7 @@ def swaplevel(self, i=-2, j=-1, copy=True) -> "Series":
Series
Series with levels swapped in MultiIndex.
"""
assert isinstance(self.index, ABCMultiIndex)
new_index = self.index.swaplevel(i, j)
return self._constructor(self._values, index=new_index, copy=copy).__finalize__(
self
Expand All @@ -3371,6 +3375,7 @@ def reorder_levels(self, order) -> "Series":
raise Exception("Can only reorder levels on a hierarchical axis.")

result = self.copy()
assert isinstance(result.index, ABCMultiIndex)
result.index = result.index.reorder_levels(order)
return result

Expand Down Expand Up @@ -4448,6 +4453,7 @@ def to_timestamp(self, freq=None, how="start", copy=True) -> "Series":
if copy:
new_values = new_values.copy()

assert isinstance(self.index, (ABCDatetimeIndex, ABCPeriodIndex))
new_index = self.index.to_timestamp(freq=freq, how=how)
return self._constructor(new_values, index=new_index).__finalize__(self)

Expand All @@ -4472,9 +4478,16 @@ def to_period(self, freq=None, copy=True) -> "Series":
if copy:
new_values = new_values.copy()

assert isinstance(self.index, ABCDatetimeIndex)
new_index = self.index.to_period(freq=freq)
return self._constructor(new_values, index=new_index).__finalize__(self)

# ----------------------------------------------------------------------
# Add index and columns
index: "Index" = properties.AxisProperty(
axis=0, doc="The index (axis labels) of the Series."
)

# ----------------------------------------------------------------------
# Accessor Methods
# ----------------------------------------------------------------------
Expand Down
6 changes: 6 additions & 0 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,13 @@
is_timedelta64_dtype,
)
from pandas.core.dtypes.generic import (
ABCDatetimeIndex,
ABCIndexClass,
ABCMultiIndex,
ABCPeriodIndex,
ABCSeries,
ABCSparseArray,
ABCTimedeltaIndex,
)
from pandas.core.dtypes.missing import isna, notna

Expand Down Expand Up @@ -295,6 +298,9 @@ def _get_footer(self) -> str:
footer = ""

if getattr(self.series.index, "freq", None) is not None:
assert isinstance(
self.series.index, (ABCDatetimeIndex, ABCPeriodIndex, ABCTimedeltaIndex)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't we have a ABCDatetimelike ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, and I only found two instances of this pattern.

)
footer += "Freq: {freq}".format(freq=self.series.index.freqstr)

if self.name is not False and name is not None:
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -957,6 +957,10 @@ def test_swaplevel(self):
exp = self.frame.swaplevel("first", "second").T
tm.assert_frame_equal(swapped, exp)

msg = "Can only swap levels on a hierarchical axis."
with pytest.raises(TypeError, match=msg):
DataFrame(range(3)).swaplevel()

def test_reorder_levels(self):
result = self.ymd.reorder_levels(["month", "day", "year"])
expected = self.ymd.swaplevel(0, 1).swaplevel(1, 2)
Expand Down