Skip to content

DEPR: Categorical fastpath #52472

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion asv_bench/benchmarks/categoricals.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ def time_regular(self):
pd.Categorical(self.values, self.categories)

def time_fastpath(self):
pd.Categorical(self.codes, self.cat_idx, fastpath=True)
dtype = pd.CategoricalDtype(categories=self.cat_idx)
pd.Categorical._simple_new(self.codes, dtype)

def time_datetimes(self):
pd.Categorical(self.datetimes)
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -167,12 +167,12 @@ Deprecations
- Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`)
- Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`)
- Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`)
- Deprecated the "fastpath" keyword in :class:`Categorical` constructor, use :meth:`Categorical.from_codes` instead (:issue:`20110`)
- Deprecated passing a dictionary to :meth:`.SeriesGroupBy.agg`; pass a list of aggregations instead (:issue:`50684`)
- Deprecated logical operations (``|``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``), wrap a sequence in a :class:`Series` or numpy array before operating instead (:issue:`51521`)
- Deprecated the methods :meth:`Series.bool` and :meth:`DataFrame.bool` (:issue:`51749`)
- Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`)
- Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`)
-


.. ---------------------------------------------------------------------------
Expand Down
31 changes: 27 additions & 4 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,15 +355,38 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi

_dtype: CategoricalDtype

@classmethod
# error: Argument 2 of "_simple_new" is incompatible with supertype
# "NDArrayBacked"; supertype defines the argument type as
# "Union[dtype[Any], ExtensionDtype]"
def _simple_new( # type: ignore[override]
cls, codes: np.ndarray, dtype: CategoricalDtype
) -> Self:
# NB: This is not _quite_ as simple as the "usual" _simple_new
codes = coerce_indexer_dtype(codes, dtype.categories)
dtype = CategoricalDtype(ordered=False).update_dtype(dtype)
return super()._simple_new(codes, dtype)

def __init__(
self,
values,
categories=None,
ordered=None,
dtype: Dtype | None = None,
fastpath: bool = False,
fastpath: bool | lib.NoDefault = lib.no_default,
copy: bool = True,
) -> None:
if fastpath is not lib.no_default:
# GH#20110
warnings.warn(
"The 'fastpath' keyword in Categorical is deprecated and will "
"be removed in a future version. Use Categorical.from_codes instead",
FutureWarning,
stacklevel=find_stack_level(),
)
else:
fastpath = False

dtype = CategoricalDtype._from_values_or_dtype(
values, categories, ordered, dtype
)
Expand Down Expand Up @@ -626,7 +649,7 @@ def _from_inferred_categories(
dtype = CategoricalDtype(cats, ordered=False)
codes = inferred_codes

return cls(codes, dtype=dtype, fastpath=True)
return cls._simple_new(codes, dtype=dtype)

@classmethod
def from_codes(
Expand Down Expand Up @@ -693,7 +716,7 @@ def from_codes(
if len(codes) and (codes.max() >= len(dtype.categories) or codes.min() < -1):
raise ValueError("codes need to be between -1 and len(categories)-1")

return cls(codes, dtype=dtype, fastpath=True)
return cls._simple_new(codes, dtype=dtype)

# ------------------------------------------------------------------
# Categories/Codes/Ordered
Expand Down Expand Up @@ -805,7 +828,7 @@ def _set_dtype(self, dtype: CategoricalDtype) -> Self:
a (valid) instance of `CategoricalDtype`.
"""
codes = recode_for_categories(self.codes, self.categories, dtype.categories)
return type(self)(codes, dtype=dtype, fastpath=True)
return type(self)._simple_new(codes, dtype=dtype)

def set_ordered(self, value: bool) -> Self:
"""
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
)
from pandas.core.dtypes.common import is_dtype_equal
from pandas.core.dtypes.dtypes import (
CategoricalDtype,
DatetimeTZDtype,
ExtensionDtype,
)
Expand Down Expand Up @@ -323,7 +324,8 @@ def _maybe_unwrap(x):
if ignore_order:
ordered = False

return Categorical(new_codes, categories=categories, ordered=ordered, fastpath=True)
dtype = CategoricalDtype(categories=categories, ordered=ordered)
return Categorical._simple_new(new_codes, dtype=dtype)


def _concatenate_2d(to_concat: Sequence[np.ndarray], axis: AxisInt) -> np.ndarray:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def _from_values_or_dtype(
CategoricalDtype(categories=['a', 'b'], ordered=True, categories_dtype=object)
>>> dtype1 = pd.CategoricalDtype(['a', 'b'], ordered=True)
>>> dtype2 = pd.CategoricalDtype(['x', 'y'], ordered=False)
>>> c = pd.Categorical([0, 1], dtype=dtype1, fastpath=True)
>>> c = pd.Categorical([0, 1], dtype=dtype1)
>>> pd.CategoricalDtype._from_values_or_dtype(
... c, ['x', 'y'], ordered=True, dtype=dtype2
... )
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def recode_for_groupby(
# return a new categorical that maps our new codes
# and categories
dtype = CategoricalDtype(categories, ordered=c.ordered)
return Categorical(codes, dtype=dtype, fastpath=True), c
return Categorical._simple_new(codes, dtype=dtype), c

# Already sorted according to c.categories; all is fine
if sort:
Expand Down
9 changes: 7 additions & 2 deletions pandas/core/util/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from pandas._libs.hashing import hash_object_array

from pandas.core.dtypes.common import is_list_like
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCExtensionArray,
Expand Down Expand Up @@ -203,7 +204,10 @@ def hash_tuples(

# create a list-of-Categoricals
cat_vals = [
Categorical(mi.codes[level], mi.levels[level], ordered=False, fastpath=True)
Categorical._simple_new(
mi.codes[level],
CategoricalDtype(categories=mi.levels[level], ordered=False),
)
for level in range(mi.nlevels)
]

Expand Down Expand Up @@ -296,7 +300,8 @@ def _hash_ndarray(
)

codes, categories = factorize(vals, sort=False)
cat = Categorical(codes, Index(categories), ordered=False, fastpath=True)
dtype = CategoricalDtype(categories=Index(categories), ordered=False)
cat = Categorical._simple_new(codes, dtype)
return cat._hash_pandas_object(
encoding=encoding, hash_key=hash_key, categorize=False
)
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/arrays/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@


class TestCategoricalConstructors:
def test_fastpath_deprecated(self):
codes = np.array([1, 2, 3])
dtype = CategoricalDtype(categories=["a", "b", "c", "d"], ordered=False)
msg = "The 'fastpath' keyword in Categorical is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
Categorical(codes, dtype=dtype, fastpath=True)

def test_categorical_from_cat_and_dtype_str_preserve_ordered(self):
# GH#49309 we should preserve orderedness in `res`
cat = Categorical([3, 1], categories=[3, 2, 1], ordered=True)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/categorical/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_na_flags_int_categories(self):
labels = np.random.randint(0, 10, 20)
labels[::5] = -1

cat = Categorical(labels, categories, fastpath=True)
cat = Categorical(labels, categories)
repr(cat)

tm.assert_numpy_array_equal(isna(cat), labels == -1)
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/arrays/categorical/test_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from pandas import (
Categorical,
CategoricalDtype,
CategoricalIndex,
Series,
date_range,
Expand All @@ -24,7 +25,9 @@ def test_print(self, factor):

class TestCategoricalRepr:
def test_big_print(self):
factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ["a", "b", "c"], fastpath=True)
codes = np.array([0, 1, 2, 0, 1, 2] * 100)
dtype = CategoricalDtype(categories=["a", "b", "c"])
factor = Categorical.from_codes(codes, dtype=dtype)
expected = [
"['a', 'b', 'c', 'a', 'b', ..., 'b', 'c', 'a', 'b', 'c']",
"Length: 600",
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/dtypes/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def test_constructor_invalid(self):

dtype1 = CategoricalDtype(["a", "b"], ordered=True)
dtype2 = CategoricalDtype(["x", "y"], ordered=False)
c = Categorical([0, 1], dtype=dtype1, fastpath=True)
c = Categorical([0, 1], dtype=dtype1)

@pytest.mark.parametrize(
"values, categories, ordered, dtype, expected",
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ def test_constructor_map(self):
tm.assert_series_equal(result, exp)

def test_constructor_categorical(self):
cat = Categorical([0, 1, 2, 0, 1, 2], ["a", "b", "c"], fastpath=True)
cat = Categorical([0, 1, 2, 0, 1, 2], ["a", "b", "c"])
res = Series(cat)
tm.assert_categorical_equal(res.values, cat)

Expand Down