pandas-dev
diff --git a/‎asv_bench/benchmarks/categoricals.py
Lines changed: 2 additions & 1 deletion b/‎asv_bench/benchmarks/categoricals.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎asv_bench/benchmarks/series_methods.py
Lines changed: 3 additions & 0 deletions b/‎asv_bench/benchmarks/series_methods.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/strings.py
Lines changed: 25 additions & 27 deletions b/‎asv_bench/benchmarks/strings.py
Lines changed: 25 additions & 27 deletions
diff --git a/‎doc/source/development/contributing.rst
Lines changed: 5 additions & 4 deletions b/‎doc/source/development/contributing.rst
Lines changed: 5 additions & 4 deletions
diff --git a/‎doc/source/development/contributing_codebase.rst
Lines changed: 18 additions & 60 deletions b/‎doc/source/development/contributing_codebase.rst
Lines changed: 18 additions & 60 deletions
diff --git a/‎doc/source/development/contributing_environment.rst
Lines changed: 5 additions & 5 deletions b/‎doc/source/development/contributing_environment.rst
Lines changed: 5 additions & 5 deletions
diff --git a/‎doc/source/user_guide/style.ipynb
Lines changed: 1 addition & 1 deletion b/‎doc/source/user_guide/style.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/whatsnew/v2.1.0.rst
Lines changed: 3 additions & 0 deletions b/‎doc/source/whatsnew/v2.1.0.rst
Lines changed: 3 additions & 0 deletions
diff --git a/‎pandas/core/arrays/categorical.py
Lines changed: 27 additions & 4 deletions b/‎pandas/core/arrays/categorical.py
Lines changed: 27 additions & 4 deletions
diff --git a/‎pandas/core/base.py
Lines changed: 9 additions & 8 deletions b/‎pandas/core/base.py
Lines changed: 9 additions & 8 deletions
@@ -42,7 +42,8 @@ def time_regular(self):
         pd.Categorical(self.values, self.categories)
 
     def time_fastpath(self):
-        pd.Categorical(self.codes, self.cat_idx, fastpath=True)
+        dtype = pd.CategoricalDtype(categories=self.cat_idx)
+        pd.Categorical._simple_new(self.codes, dtype)
 
     def time_datetimes(self):
         pd.Categorical(self.datetimes)
 
@@ -385,6 +385,9 @@ def time_to_numpy_double_copy(self):
     def time_to_numpy_copy(self):
         self.ser.to_numpy(copy=True)
 
+    def time_to_numpy_float_with_nan(self):
+        self.ser.to_numpy(dtype="float64", na_value=np.nan)
+
 
 class Replace:
     param_names = ["num_to_replace"]
 
@@ -25,33 +25,31 @@ def setup(self, dtype):
 
 
 class Construction:
-    params = ["str", "string"]
-    param_names = ["dtype"]
-
-    def setup(self, dtype):
-        self.series_arr = tm.rands_array(nchars=10, size=10**5)
-        self.frame_arr = self.series_arr.reshape((50_000, 2)).copy()
-
-        # GH37371. Testing construction of string series/frames from ExtensionArrays
-        self.series_cat_arr = Categorical(self.series_arr)
-
-    def time_series_construction(self, dtype):
-        Series(self.series_arr, dtype=dtype)
-
-    def peakmem_series_construction(self, dtype):
-        Series(self.series_arr, dtype=dtype)
-
-    def time_frame_construction(self, dtype):
-        DataFrame(self.frame_arr, dtype=dtype)
-
-    def peakmem_frame_construction(self, dtype):
-        DataFrame(self.frame_arr, dtype=dtype)
-
-    def time_cat_series_construction(self, dtype):
-        Series(self.series_cat_arr, dtype=dtype)
-
-    def peakmem_cat_series_construction(self, dtype):
-        Series(self.series_cat_arr, dtype=dtype)
+    params = (
+        ["series", "frame", "categorical_series"],
+        ["str", "string[python]", "string[pyarrow]"],
+    )
+    param_names = ["pd_type", "dtype"]
+    pd_mapping = {"series": Series, "frame": DataFrame, "categorical_series": Series}
+    dtype_mapping = {"str": "str", "string[python]": object, "string[pyarrow]": object}
+
+    def setup(self, pd_type, dtype):
+        series_arr = tm.rands_array(
+            nchars=10, size=10**5, dtype=self.dtype_mapping[dtype]
+        )
+        if pd_type == "series":
+            self.arr = series_arr
+        elif pd_type == "frame":
+            self.arr = series_arr.reshape((50_000, 2)).copy()
+        elif pd_type == "categorical_series":
+            # GH37371. Testing construction of string series/frames from ExtensionArrays
+            self.arr = Categorical(series_arr)
+
+    def time_construction(self, pd_type, dtype):
+        self.pd_mapping[pd_type](self.arr, dtype=dtype)
+
+    def peakmem_construction(self, pd_type, dtype):
+        self.pd_mapping[pd_type](self.arr, dtype=dtype)
 
 
 class Methods(Dtypes):
 
@@ -47,7 +47,7 @@ that is assigned, feel free to kindly ask the current assignee if you can take i
 
 We have several :ref:`contributor community <community>` communication channels, which you are
 welcome to join, and ask questions as you figure things out. Among them are regular meetings for
-new contributors, dev meetings, a dev mailing list, and a slack for the contributor community.
+new contributors, dev meetings, a dev mailing list, and a Slack for the contributor community.
 All pandas contributors are welcome to these spaces, where they can connect with each other. Even
 maintainers who have been with us for a long time felt just like you when they started out, and
 are happy to welcome you and support you as you get to know how we work, and where things are.
@@ -308,8 +308,9 @@ default commit message will open, and you can simply save and quit this file.
 If there are merge conflicts, you need to solve those conflicts. See for
 example at https://help.github.com/articles/resolving-a-merge-conflict-using-the-command-line/
 for an explanation on how to do this.
-Once the conflicts are merged and the files where the conflicts were solved are
-added, you can run ``git commit`` to save those fixes.
+Once the conflicts are resolved, you should do:
+1. ``git add -u`` to stage any files you've updated;
+2. ``git commit`` to finish the merge.
 
 If you have uncommitted changes at the moment you want to update the branch with
 main, you will need to ``stash`` them prior to updating (see the
@@ -324,7 +325,7 @@ request by pushing to the branch on GitHub::
 Autofixing formatting errors
 ----------------------------
 
-We use several styling checks (e.g. ``black``, ``flake8``, ``isort``) which are run after
+We use several styling checks (e.g. ``black``, ``ruff``, ``isort``) which are run after
 you make a pull request.
 
 To automatically fix formatting errors on each commit you make, you can
 
@@ -18,40 +18,32 @@ tools will be run to check your code for stylistic errors.
 Generating any warnings will cause the test to fail.
 Thus, good style is a requirement for submitting code to pandas.
 
-There is a tool in pandas to help contributors verify their changes before
-contributing them to the project::
+There are a couple of tools in pandas to help contributors verify their changes
+before contributing to the project
 
-   ./ci/code_checks.sh
-
-The script validates the doctests, formatting in docstrings, and
-imported modules. It is possible to run the checks independently by using the
-parameters ``docstrings``, ``code``, and ``doctests``
-(e.g. ``./ci/code_checks.sh doctests``).
+- ``./ci/code_checks.sh``: a script validates the doctests, formatting in docstrings,
+  and imported modules. It is possible to run the checks independently by using the
+  parameters ``docstrings``, ``code``, and ``doctests``
+  (e.g. ``./ci/code_checks.sh doctests``);
+- ``pre-commit``, which we go into detail on in the next section.
 
 In addition, because a lot of people use our library, it is important that we
 do not make sudden changes to the code that could have the potential to break
 a lot of user code as a result, that is, we need it to be as *backwards compatible*
 as possible to avoid mass breakages.
 
-In addition to ``./ci/code_checks.sh``, some extra checks (including static type
-checking) are run by ``pre-commit`` - see :ref:`here <contributing.pre-commit>`
-for how to run them.
-
 .. _contributing.pre-commit:
 
 Pre-commit
 ----------
 
 Additionally, :ref:`Continuous Integration <contributing.ci>` will run code formatting checks
 like ``black``, ``ruff``,
-``isort``, and ``cpplint`` and more using `pre-commit hooks <https://pre-commit.com/>`_
+``isort``, and ``cpplint`` and more using `pre-commit hooks <https://pre-commit.com/>`_.
 Any warnings from these checks will cause the :ref:`Continuous Integration <contributing.ci>` to fail; therefore,
 it is helpful to run the check yourself before submitting code. This
-can be done by installing ``pre-commit``::
-
-    pip install pre-commit
-
-and then running::
+can be done by installing ``pre-commit`` (which should already have happened if you followed the instructions
+in :ref:`Setting up your development environment <contributing_environment>`) and then running::
 
     pre-commit install
 
@@ -63,17 +55,17 @@ remain up-to-date with our code checks as they change.
 Note that if needed, you can skip these checks with ``git commit --no-verify``.
 
 If you don't want to use ``pre-commit`` as part of your workflow, you can still use it
-to run its checks with::
+to run its checks with one of the following::
 
     pre-commit run --files <files you have modified>
+    pre-commit run --from-ref=upstream/main --to-ref=HEAD --all-files
 
 without needing to have done ``pre-commit install`` beforehand.
 
-If you want to run checks on all recently committed files on upstream/main you can use::
-
-    pre-commit run --from-ref=upstream/main --to-ref=HEAD --all-files
+Finally, we also have some slow pre-commit checks, which don't run on each commit
+but which do run during continuous integration. You can trigger them manually with::
 
-without needing to have done ``pre-commit install`` beforehand.
+    pre-commit run --hook-stage manual --all-files
 
 .. note::
 
@@ -170,43 +162,9 @@ pandas strongly encourages the use of :pep:`484` style type hints. New developme
 Style guidelines
 ~~~~~~~~~~~~~~~~
 
-Type imports should follow the ``from typing import ...`` convention. Some types do not need to be imported since :pep:`585` some builtin constructs, such as ``list`` and ``tuple``, can directly be used for type annotations. So rather than
-
-.. code-block:: python
-
-   import typing
-
-   primes: typing.List[int] = []
-
-You should write
-
-.. code-block:: python
-
-   primes: list[int] = []
-
-``Optional`` should be  avoided in favor of the shorter ``| None``, so instead of
-
-.. code-block:: python
-
-   from typing import Union
-
-   maybe_primes: list[Union[int, None]] = []
-
-or
-
-.. code-block:: python
-
-   from typing import Optional
-
-   maybe_primes: list[Optional[int]] = []
-
-You should write
-
-.. code-block:: python
-
-   from __future__ import annotations  # noqa: F404
-
-   maybe_primes: list[int | None] = []
+Type imports should follow the ``from typing import ...`` convention.
+Your code may be automatically re-written to use some modern constructs (e.g. using the built-in ``list`` instead of ``typing.List``)
+by the :ref:`pre-commit checks <contributing.pre-commit>`.
 
 In some cases in the code base classes may define class variables that shadow builtins. This causes an issue as described in `Mypy 1775 <https://github.com/python/mypy/issues/1775#issuecomment-310969854>`_. The defensive solution here is to create an unambiguous alias of the builtin and use that without your annotation. For example, if you come across a definition like
 
 
@@ -21,7 +21,7 @@ locally before pushing your changes. It's recommended to also install the :ref:`
 Step 1: install a C compiler
 ----------------------------
 
-How to do this will depend on your platform. If you choose to use ``Docker``
+How to do this will depend on your platform. If you choose to use ``Docker`` or ``GitPod``
 in the next step, then you can skip this step.
 
 **Windows**
@@ -213,6 +213,10 @@ You can now run::
    python setup.py build_ext -j 4
    python -m pip install -e . --no-build-isolation --no-use-pep517
 
+.. note::
+   You will need to repeat this step each time the C extensions change, for example
+   if you modified any file in ``pandas/_libs`` or if you did a fetch and merge from ``upstream/main``.
+
 At this point you should be able to import pandas from your locally built version::
 
    $ python
@@ -222,7 +226,3 @@ At this point you should be able to import pandas from your locally built versio
 
 This will create the new environment, and not touch any of your existing environments,
 nor any existing Python installation.
-
-.. note::
-   You will need to repeat this step each time the C extensions change, for example
-   if you modified any file in ``pandas/_libs`` or if you did a fetch and merge from ``upstream/main``.
@@ -2131,4 +2131,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 1
-}
+}
@@ -120,6 +120,7 @@ The following functions gained a new keyword ``dtype_backend`` (:issue:`36712`)
 * :func:`read_sql`
 * :func:`read_sql_query`
 * :func:`read_sql_table`
+* :func:`read_parquet`
 * :func:`read_orc`
 * :func:`read_feather`
 * :func:`read_spss`
 
@@ -169,6 +169,7 @@ Deprecations
 - Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`)
 - Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`)
 - Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`)
+- Deprecated the "fastpath" keyword in :class:`Categorical` constructor, use :meth:`Categorical.from_codes` instead (:issue:`20110`)
 - Deprecated passing a dictionary to :meth:`.SeriesGroupBy.agg`; pass a list of aggregations instead (:issue:`50684`)
 - Deprecated logical operations (``|``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``), wrap a sequence in a :class:`Series` or numpy array before operating instead (:issue:`51521`)
 - Deprecated the methods :meth:`Series.bool` and :meth:`DataFrame.bool` (:issue:`51749`)
@@ -198,6 +199,7 @@ Performance improvements
 - Performance improvement in :meth:`MultiIndex.set_levels` and :meth:`MultiIndex.set_codes` when ``verify_integrity=True`` (:issue:`51873`)
 - Performance improvement in :func:`factorize` for object columns not containing strings (:issue:`51921`)
 - Performance improvement in :class:`Series` reductions (:issue:`52341`)
+- Performance improvement in :meth:`Series.to_numpy` when dtype is a numpy float dtype and ``na_value`` is ``np.nan`` (:issue:`52430`)
 -
 
 .. ---------------------------------------------------------------------------
@@ -298,6 +300,7 @@ Groupby/resample/rolling
   or :class:`PeriodIndex`, and the ``groupby`` method was given a function as its first argument,
   the function operated on the whole index rather than each element of the index. (:issue:`51979`)
 - Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64 or :class:`PeriodDtype` values (:issue:`52128`)
+- Bug in :meth:`DataFrameGroupBy.apply` causing an error to be raised when the input :class:`DataFrame` was subset as a :class:`DataFrame` after groupby (``[['a']]`` and not ``['a']``) and the given callable returned :class:`Series` that were not all indexed the same. (:issue:`52444`)
 -
 
 Reshaping
 
@@ -355,15 +355,38 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi
 
     _dtype: CategoricalDtype
 
+    @classmethod
+    # error: Argument 2 of "_simple_new" is incompatible with supertype
+    # "NDArrayBacked"; supertype defines the argument type as
+    # "Union[dtype[Any], ExtensionDtype]"
+    def _simple_new(  # type: ignore[override]
+        cls, codes: np.ndarray, dtype: CategoricalDtype
+    ) -> Self:
+        # NB: This is not _quite_ as simple as the "usual" _simple_new
+        codes = coerce_indexer_dtype(codes, dtype.categories)
+        dtype = CategoricalDtype(ordered=False).update_dtype(dtype)
+        return super()._simple_new(codes, dtype)
+
     def __init__(
         self,
         values,
         categories=None,
         ordered=None,
         dtype: Dtype | None = None,
-        fastpath: bool = False,
+        fastpath: bool | lib.NoDefault = lib.no_default,
         copy: bool = True,
     ) -> None:
+        if fastpath is not lib.no_default:
+            # GH#20110
+            warnings.warn(
+                "The 'fastpath' keyword in Categorical is deprecated and will "
+                "be removed in a future version. Use Categorical.from_codes instead",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
+        else:
+            fastpath = False
+
         dtype = CategoricalDtype._from_values_or_dtype(
             values, categories, ordered, dtype
         )
@@ -626,7 +649,7 @@ def _from_inferred_categories(
             dtype = CategoricalDtype(cats, ordered=False)
             codes = inferred_codes
 
-        return cls(codes, dtype=dtype, fastpath=True)
+        return cls._simple_new(codes, dtype=dtype)
 
     @classmethod
     def from_codes(
@@ -693,7 +716,7 @@ def from_codes(
         if len(codes) and (codes.max() >= len(dtype.categories) or codes.min() < -1):
             raise ValueError("codes need to be between -1 and len(categories)-1")
 
-        return cls(codes, dtype=dtype, fastpath=True)
+        return cls._simple_new(codes, dtype=dtype)
 
     # ------------------------------------------------------------------
     # Categories/Codes/Ordered
@@ -805,7 +828,7 @@ def _set_dtype(self, dtype: CategoricalDtype) -> Self:
         a (valid) instance of `CategoricalDtype`.
         """
         codes = recode_for_categories(self.codes, self.categories, dtype.categories)
-        return type(self)(codes, dtype=dtype, fastpath=True)
+        return type(self)._simple_new(codes, dtype=dtype)
 
     def set_ordered(self, value: bool) -> Self:
         """
 
@@ -573,25 +573,26 @@ def to_numpy(
                 f"to_numpy() got an unexpected keyword argument '{bad_keys}'"
             )
 
-        if na_value is not lib.no_default:
-            values = self._values
+        fillna = (
+            na_value is not lib.no_default
+            # no need to fillna with np.nan if we already have a float dtype
+            and not (na_value is np.nan and np.issubdtype(self.dtype, np.floating))
+        )
+
+        values = self._values
+        if fillna:
             if not can_hold_element(values, na_value):
                 # if we can't hold the na_value asarray either makes a copy or we
                 # error before modifying values. The asarray later on thus won't make
                 # another copy
                 values = np.asarray(values, dtype=dtype)
             else:
                 values = values.copy()
-
             values[np.asanyarray(self.isna())] = na_value
-        else:
-            values = self._values
 
         result = np.asarray(values, dtype=dtype)
 
-        if (copy and na_value is lib.no_default) or (
-            not copy and using_copy_on_write()
-        ):
+        if (copy and not fillna) or (not copy and using_copy_on_write()):
             if np.shares_memory(self._values[:2], result[:2]):
                 # Take slices to improve performance of check
                 if using_copy_on_write() and not copy: