pandas-dev · mroeschke · Apr 6, 2023 · Apr 5, 2023 · Apr 6, 2023 · Apr 6, 2023
diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
@@ -42,7 +42,8 @@ def time_regular(self):
         pd.Categorical(self.values, self.categories)
 
     def time_fastpath(self):
-        pd.Categorical(self.codes, self.cat_idx, fastpath=True)
+        dtype = pd.CategoricalDtype(categories=self.cat_idx)
+        pd.Categorical._simple_new(self.codes, dtype)
 
     def time_datetimes(self):
         pd.Categorical(self.datetimes)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -167,12 +167,12 @@ Deprecations
 - Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`)
 - Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`)
 - Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`)
+- Deprecated the "fastpath" keyword in :class:`Categorical` constructor, use :meth:`Categorical.from_codes` instead (:issue:`20110`)
 - Deprecated passing a dictionary to :meth:`.SeriesGroupBy.agg`; pass a list of aggregations instead (:issue:`50684`)
 - Deprecated logical operations (``|``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``), wrap a sequence in a :class:`Series` or numpy array before operating instead (:issue:`51521`)
 - Deprecated the methods :meth:`Series.bool` and :meth:`DataFrame.bool` (:issue:`51749`)
 - Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`)
 - Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`)
--
 
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -355,15 +355,38 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi
 
     _dtype: CategoricalDtype
 
+    @classmethod
+    # error: Argument 2 of "_simple_new" is incompatible with supertype
+    # "NDArrayBacked"; supertype defines the argument type as
+    # "Union[dtype[Any], ExtensionDtype]"
+    def _simple_new(  # type: ignore[override]
+        cls, codes: np.ndarray, dtype: CategoricalDtype
+    ) -> Self:
+        # NB: This is not _quite_ as simple as the "usual" _simple_new
+        codes = coerce_indexer_dtype(codes, dtype.categories)
+        dtype = CategoricalDtype(ordered=False).update_dtype(dtype)
+        return super()._simple_new(codes, dtype)
+
     def __init__(
         self,
         values,
         categories=None,
         ordered=None,
         dtype: Dtype | None = None,
-        fastpath: bool = False,
+        fastpath: bool | lib.NoDefault = lib.no_default,
         copy: bool = True,
     ) -> None:
+        if fastpath is not lib.no_default:
+            # GH#20110
+            warnings.warn(
+                "The 'fastpath' keyword in Categorical is deprecated and will "
+                "be removed in a future version. Use Categorical.from_codes instead",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
+        else:
+            fastpath = False
+
         dtype = CategoricalDtype._from_values_or_dtype(
             values, categories, ordered, dtype
         )
@@ -626,7 +649,7 @@ def _from_inferred_categories(
             dtype = CategoricalDtype(cats, ordered=False)
             codes = inferred_codes
 
-        return cls(codes, dtype=dtype, fastpath=True)
+        return cls._simple_new(codes, dtype=dtype)
 
     @classmethod
     def from_codes(
@@ -693,7 +716,7 @@ def from_codes(
         if len(codes) and (codes.max() >= len(dtype.categories) or codes.min() < -1):
             raise ValueError("codes need to be between -1 and len(categories)-1")
 
-        return cls(codes, dtype=dtype, fastpath=True)
+        return cls._simple_new(codes, dtype=dtype)
 
     # ------------------------------------------------------------------
     # Categories/Codes/Ordered
@@ -805,7 +828,7 @@ def _set_dtype(self, dtype: CategoricalDtype) -> Self:
         a (valid) instance of `CategoricalDtype`.
         """
         codes = recode_for_categories(self.codes, self.categories, dtype.categories)
-        return type(self)(codes, dtype=dtype, fastpath=True)
+        return type(self)._simple_new(codes, dtype=dtype)
 
     def set_ordered(self, value: bool) -> Self:
         """

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -20,6 +20,7 @@
 )
 from pandas.core.dtypes.common import is_dtype_equal
 from pandas.core.dtypes.dtypes import (
+    CategoricalDtype,
     DatetimeTZDtype,
     ExtensionDtype,
 )
@@ -323,7 +324,8 @@ def _maybe_unwrap(x):
     if ignore_order:
         ordered = False
 
-    return Categorical(new_codes, categories=categories, ordered=ordered, fastpath=True)
+    dtype = CategoricalDtype(categories=categories, ordered=ordered)
+    return Categorical._simple_new(new_codes, dtype=dtype)
 
 
 def _concatenate_2d(to_concat: Sequence[np.ndarray], axis: AxisInt) -> np.ndarray:

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -260,7 +260,7 @@ def _from_values_or_dtype(
         CategoricalDtype(categories=['a', 'b'], ordered=True, categories_dtype=object)
         >>> dtype1 = pd.CategoricalDtype(['a', 'b'], ordered=True)
         >>> dtype2 = pd.CategoricalDtype(['x', 'y'], ordered=False)
-        >>> c = pd.Categorical([0, 1], dtype=dtype1, fastpath=True)
+        >>> c = pd.Categorical([0, 1], dtype=dtype1)
         >>> pd.CategoricalDtype._from_values_or_dtype(
         ...     c, ['x', 'y'], ordered=True, dtype=dtype2
         ... )

@@ -63,7 +63,7 @@ def recode_for_groupby(
         # return a new categorical that maps our new codes
         # and categories
         dtype = CategoricalDtype(categories, ordered=c.ordered)
-        return Categorical(codes, dtype=dtype, fastpath=True), c
+        return Categorical._simple_new(codes, dtype=dtype), c
 
     # Already sorted according to c.categories; all is fine
     if sort:

diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py
@@ -16,6 +16,7 @@
 from pandas._libs.hashing import hash_object_array
 
 from pandas.core.dtypes.common import is_list_like
+from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
     ABCExtensionArray,
@@ -203,7 +204,10 @@ def hash_tuples(
 
     # create a list-of-Categoricals
     cat_vals = [
-        Categorical(mi.codes[level], mi.levels[level], ordered=False, fastpath=True)
+        Categorical._simple_new(
+            mi.codes[level],
+            CategoricalDtype(categories=mi.levels[level], ordered=False),
+        )
         for level in range(mi.nlevels)
     ]
 
@@ -296,7 +300,8 @@ def _hash_ndarray(
             )
 
             codes, categories = factorize(vals, sort=False)
-            cat = Categorical(codes, Index(categories), ordered=False, fastpath=True)
+            dtype = CategoricalDtype(categories=Index(categories), ordered=False)
+            cat = Categorical._simple_new(codes, dtype)
             return cat._hash_pandas_object(
                 encoding=encoding, hash_key=hash_key, categorize=False
             )

diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
@@ -32,6 +32,13 @@
 
 
 class TestCategoricalConstructors:
+    def test_fastpath_deprecated(self):
+        codes = np.array([1, 2, 3])
+        dtype = CategoricalDtype(categories=["a", "b", "c", "d"], ordered=False)
+        msg = "The 'fastpath' keyword in Categorical is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            Categorical(codes, dtype=dtype, fastpath=True)
+
     def test_categorical_from_cat_and_dtype_str_preserve_ordered(self):
         # GH#49309 we should preserve orderedness in `res`
         cat = Categorical([3, 1], categories=[3, 2, 1], ordered=True)

diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py
@@ -31,7 +31,7 @@ def test_na_flags_int_categories(self):
         labels = np.random.randint(0, 10, 20)
         labels[::5] = -1
 
-        cat = Categorical(labels, categories, fastpath=True)
+        cat = Categorical(labels, categories)
         repr(cat)
 
         tm.assert_numpy_array_equal(isna(cat), labels == -1)

diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py
@@ -2,6 +2,7 @@
 
 from pandas import (
     Categorical,
+    CategoricalDtype,
     CategoricalIndex,
     Series,
     date_range,
@@ -24,7 +25,9 @@ def test_print(self, factor):
 
 class TestCategoricalRepr:
     def test_big_print(self):
-        factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ["a", "b", "c"], fastpath=True)
+        codes = np.array([0, 1, 2, 0, 1, 2] * 100)
+        dtype = CategoricalDtype(categories=["a", "b", "c"])
+        factor = Categorical.from_codes(codes, dtype=dtype)
         expected = [
             "['a', 'b', 'c', 'a', 'b', ..., 'b', 'c', 'a', 'b', 'c']",
             "Length: 600",

diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
@@ -123,7 +123,7 @@ def test_constructor_invalid(self):
 
     dtype1 = CategoricalDtype(["a", "b"], ordered=True)
     dtype2 = CategoricalDtype(["x", "y"], ordered=False)
-    c = Categorical([0, 1], dtype=dtype1, fastpath=True)
+    c = Categorical([0, 1], dtype=dtype1)
 
     @pytest.mark.parametrize(
         "values, categories, ordered, dtype, expected",

diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
@@ -385,7 +385,7 @@ def test_constructor_map(self):
         tm.assert_series_equal(result, exp)
 
     def test_constructor_categorical(self):
-        cat = Categorical([0, 1, 2, 0, 1, 2], ["a", "b", "c"], fastpath=True)
+        cat = Categorical([0, 1, 2, 0, 1, 2], ["a", "b", "c"])
         res = Series(cat)
         tm.assert_categorical_equal(res.values, cat)