Skip to content

REF: dispatch Series.rank to EA #45037

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,6 @@ def _get_hashtable_algo(values: np.ndarray):


def _get_values_for_rank(values: ArrayLike) -> np.ndarray:
if is_categorical_dtype(values):
values = cast("Categorical", values)._values_for_rank()

values = _ensure_data(values)
if values.dtype.kind in ["i", "u", "f"]:
Expand Down Expand Up @@ -993,13 +991,13 @@ def rank(
na_option: str = "keep",
ascending: bool = True,
pct: bool = False,
) -> np.ndarray:
) -> npt.NDArray[np.float64]:
"""
Rank the values along a given axis.

Parameters
----------
values : array-like
values : np.ndarray or ExtensionArray
Array whose values will be ranked. The number of dimensions in this
array must not exceed 2.
axis : int, default 0
Expand Down
27 changes: 27 additions & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
from pandas.core.algorithms import (
factorize_array,
isin,
rank,
unique,
)
from pandas.core.array_algos.quantile import quantile_with_mask
Expand Down Expand Up @@ -1496,6 +1497,32 @@ def _fill_mask_inplace(
self[mask] = new_values[mask]
return

def _rank(
self,
*,
axis: int = 0,
method: str = "average",
na_option: str = "keep",
ascending: bool = True,
pct: bool = False,
):
"""
See Series.rank.__doc__.
"""
if axis != 0:
raise NotImplementedError

# TODO: we only have tests that get here with dt64 and td64
# TODO: all tests that get here use the defaults for all the kwds
return rank(
self,
axis=axis,
method=method,
na_option=na_option,
ascending=ascending,
pct=pct,
)

@classmethod
def _empty(cls, shape: Shape, dtype: ExtensionDtype):
"""
Expand Down
24 changes: 24 additions & 0 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1842,6 +1842,30 @@ def sort_values(
codes = self._codes[sorted_idx]
return self._from_backing_data(codes)

def _rank(
self,
*,
axis: int = 0,
method: str = "average",
na_option: str = "keep",
ascending: bool = True,
pct: bool = False,
):
"""
See Series.rank.__doc__.
"""
if axis != 0:
raise NotImplementedError
vff = self._values_for_rank()
return algorithms.rank(
vff,
axis=axis,
method=method,
na_option=na_option,
ascending=ascending,
pct=pct,
)

def _values_for_rank(self):
"""
For correctly ranking ordered categorical data. See GH#15420
Expand Down
39 changes: 26 additions & 13 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8494,19 +8494,32 @@ def rank(
raise ValueError(msg)

def ranker(data):
ranks = algos.rank(
data.values,
axis=axis,
method=method,
ascending=ascending,
na_option=na_option,
pct=pct,
)
# error: Argument 1 to "NDFrame" has incompatible type "ndarray"; expected
# "Union[ArrayManager, BlockManager]"
ranks_obj = self._constructor(
ranks, **data._construct_axes_dict() # type: ignore[arg-type]
)
if data.ndim == 2:
# i.e. DataFrame, we cast to ndarray
values = data.values
else:
# i.e. Series, can dispatch to EA
values = data._values

if isinstance(values, ExtensionArray):
ranks = values._rank(
axis=axis,
method=method,
ascending=ascending,
na_option=na_option,
pct=pct,
)
else:
ranks = algos.rank(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you not just do this indiretion in algos?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

either way i guess

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i take it back, doing it in algos causes a RecursionError

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

kk i see what you did for mode so maybe this ok

values,
axis=axis,
method=method,
ascending=ascending,
na_option=na_option,
pct=pct,
)

ranks_obj = self._constructor(ranks, **data._construct_axes_dict())
return ranks_obj.__finalize__(self, method="rank")

# if numeric_only is None, and we can't get anything, we try with
Expand Down