pandas-dev
diff --git a/‎.github/workflows/unit-tests.yml
Lines changed: 0 additions & 1 deletion b/‎.github/workflows/unit-tests.yml
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/workflows/wheels.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/wheels.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.pre-commit-config.yaml
Lines changed: 3 additions & 3 deletions b/‎.pre-commit-config.yaml
Lines changed: 3 additions & 3 deletions
diff --git a/‎ci/code_checks.sh
Lines changed: 0 additions & 10 deletions b/‎ci/code_checks.sh
Lines changed: 0 additions & 10 deletions
diff --git a/‎ci/deps/actions-311-pyarrownightly.yaml
Lines changed: 1 addition & 1 deletion b/‎ci/deps/actions-311-pyarrownightly.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/development/contributing_codebase.rst
Lines changed: 6 additions & 0 deletions b/‎doc/source/development/contributing_codebase.rst
Lines changed: 6 additions & 0 deletions
diff --git a/‎doc/source/user_guide/cookbook.rst
Lines changed: 3 additions & 3 deletions b/‎doc/source/user_guide/cookbook.rst
Lines changed: 3 additions & 3 deletions
diff --git a/‎doc/source/user_guide/gotchas.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/user_guide/gotchas.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/user_guide/groupby.rst
Lines changed: 3 additions & 3 deletions b/‎doc/source/user_guide/groupby.rst
Lines changed: 3 additions & 3 deletions
diff --git a/‎doc/source/user_guide/integer_na.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/user_guide/integer_na.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/user_guide/io.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/user_guide/io.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v1.0.2.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/whatsnew/v1.0.2.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v2.3.0.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/whatsnew/v2.3.0.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/_config/config.py
Lines changed: 4 additions & 0 deletions b/‎pandas/_config/config.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎pandas/_libs/lib.pyx
Lines changed: 22 additions & 0 deletions b/‎pandas/_libs/lib.pyx
Lines changed: 22 additions & 0 deletions
diff --git a/‎pandas/_libs/tslibs/offsets.pyx
Lines changed: 11 additions & 0 deletions b/‎pandas/_libs/tslibs/offsets.pyx
Lines changed: 11 additions & 0 deletions
diff --git a/‎pandas/_libs/tslibs/timedeltas.pyx
Lines changed: 8 additions & 4 deletions b/‎pandas/_libs/tslibs/timedeltas.pyx
Lines changed: 8 additions & 4 deletions
diff --git a/‎pandas/conftest.py
Lines changed: 2 additions & 1 deletion b/‎pandas/conftest.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎pandas/core/array_algos/masked_reductions.py
Lines changed: 4 additions & 0 deletions b/‎pandas/core/array_algos/masked_reductions.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎pandas/core/arrays/arrow/array.py
Lines changed: 32 additions & 0 deletions b/‎pandas/core/arrays/arrow/array.py
Lines changed: 32 additions & 0 deletions
@@ -86,7 +86,6 @@ jobs:
       TEST_ARGS: ${{ matrix.test_args || '' }}
       PYTEST_WORKERS: 'auto'
       PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
-      NPY_PROMOTION_STATE: ${{ matrix.env_file == 'actions-311-numpydev.yaml' && 'weak' || 'legacy' }}
       # Clipboard tests
       QT_QPA_PLATFORM: offscreen
       REMOVE_PYARROW: ${{ matrix.name == 'Future infer strings (without pyarrow)' && '1' || '0' }}
 
@@ -156,7 +156,7 @@ jobs:
         run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
 
       - name: Build wheels
-        uses: pypa/[email protected].0
+        uses: pypa/[email protected].3
         with:
          package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
         env:
 
@@ -2,9 +2,9 @@ minimum_pre_commit_version: 2.15.0
 exclude: ^LICENSES/|\.(html|csv|svg)$
 # reserve "manual" for relatively slow hooks which we still want to run in CI
 default_stages: [
-    commit,
-    merge-commit,
-    push,
+    pre-commit,
+    pre-merge-commit,
+    pre-push,
     prepare-commit-msg,
     commit-msg,
     post-checkout,
 
@@ -85,8 +85,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timestamp.resolution PR02" \
         -i "pandas.Timestamp.tzinfo GL08" \
         -i "pandas.Timestamp.year GL08" \
-        -i "pandas.api.types.is_integer PR01,SA01" \
-        -i "pandas.api.types.is_iterator PR07,SA01" \
         -i "pandas.api.types.is_re_compilable PR07,SA01" \
         -i "pandas.api.types.pandas_dtype PR07,RT03,SA01" \
         -i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
@@ -123,11 +121,9 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.resample.Resampler.quantile PR01,PR07" \
         -i "pandas.core.resample.Resampler.sem SA01" \
         -i "pandas.core.resample.Resampler.std SA01" \
-        -i "pandas.core.resample.Resampler.sum SA01" \
         -i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
         -i "pandas.core.resample.Resampler.var SA01" \
         -i "pandas.errors.AttributeConflictWarning SA01" \
-        -i "pandas.errors.CSSWarning SA01" \
         -i "pandas.errors.ChainedAssignmentError SA01" \
         -i "pandas.errors.DataError SA01" \
         -i "pandas.errors.DuplicateLabelError SA01" \
@@ -136,22 +132,17 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.errors.NullFrequencyError SA01" \
         -i "pandas.errors.NumExprClobberingError SA01" \
         -i "pandas.errors.NumbaUtilError SA01" \
-        -i "pandas.errors.OptionError SA01" \
         -i "pandas.errors.OutOfBoundsTimedelta SA01" \
         -i "pandas.errors.PerformanceWarning SA01" \
         -i "pandas.errors.PossibleDataLossError SA01" \
-        -i "pandas.errors.PossiblePrecisionLoss SA01" \
         -i "pandas.errors.UndefinedVariableError PR01,SA01" \
         -i "pandas.errors.UnsortedIndexError SA01" \
-        -i "pandas.errors.UnsupportedFunctionCall SA01" \
         -i "pandas.errors.ValueLabelTypeMismatch SA01" \
         -i "pandas.infer_freq SA01" \
         -i "pandas.io.json.build_table_schema PR07,RT03,SA01" \
         -i "pandas.io.stata.StataWriter.write_file SA01" \
-        -i "pandas.json_normalize RT03,SA01" \
         -i "pandas.plotting.andrews_curves RT03,SA01" \
         -i "pandas.plotting.scatter_matrix PR07,SA01" \
-        -i "pandas.set_eng_float_format RT03,SA01" \
         -i "pandas.tseries.offsets.BDay PR02,SA01" \
         -i "pandas.tseries.offsets.BQuarterBegin.is_on_offset GL08" \
         -i "pandas.tseries.offsets.BQuarterBegin.n GL08" \
@@ -297,7 +288,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.tseries.offsets.Second.is_on_offset GL08" \
         -i "pandas.tseries.offsets.Second.n GL08" \
         -i "pandas.tseries.offsets.Second.normalize GL08" \
-        -i "pandas.tseries.offsets.SemiMonthBegin SA01" \
         -i "pandas.tseries.offsets.SemiMonthBegin.day_of_month GL08" \
         -i "pandas.tseries.offsets.SemiMonthBegin.is_on_offset GL08" \
         -i "pandas.tseries.offsets.SemiMonthBegin.n GL08" \
 
@@ -18,7 +18,7 @@ dependencies:
 
   # required dependencies
   - python-dateutil
-  - numpy<2
+  - numpy
   - pip
 
   - pip:
 
@@ -298,6 +298,12 @@ So, before actually writing any code, you should write your tests.  Often the te
 taken from the original GitHub issue.  However, it is always worth considering additional
 use cases and writing corresponding tests.
 
+We use `code coverage <https://en.wikipedia.org/wiki/Code_coverage>`_ to help understand
+the amount of code which is covered by a test. We recommend striving to ensure code
+you add or change within Pandas is covered by a test. Please see our
+`code coverage dashboard through Codecov <https://app.codecov.io/github/pandas-dev/pandas>`_
+for more information.
+
 Adding tests is one of the most common requests after code is pushed to pandas.  Therefore,
 it is worth getting in the habit of writing tests ahead of time so this is never an issue.
 
 
@@ -35,7 +35,7 @@ These are some neat pandas ``idioms``
    )
    df
 
-if-then...
+If-then...
 **********
 
 An if-then on one column
@@ -176,7 +176,7 @@ One could hard code:
 Selection
 ---------
 
-Dataframes
+DataFrames
 **********
 
 The :ref:`indexing <indexing>` docs.
@@ -1489,7 +1489,7 @@ of the data values:
    )
    df
 
-Constant series
+Constant Series
 ---------------
 
 To assess if a series has a constant value, we can check if ``series.nunique() <= 1``.
 
@@ -121,7 +121,7 @@ Below is how to check if any of the values are ``True``:
     if pd.Series([False, True, False]).any():
         print("I am any")
 
-Bitwise boolean
+Bitwise Boolean
 ~~~~~~~~~~~~~~~
 
 Bitwise boolean operators like ``==`` and ``!=`` return a boolean :class:`Series`
 
@@ -618,7 +618,7 @@ this will make an extra copy.
 
 .. _groupby.aggregate.udf:
 
-Aggregation with User-Defined Functions
+Aggregation with user-defined functions
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Users can also provide their own User-Defined Functions (UDFs) for custom aggregations.
@@ -1261,7 +1261,7 @@ with
     df.groupby("A", group_keys=False).apply(lambda x: x, include_groups=False)
 
 
-Numba Accelerated Routines
+Numba accelerated routines
 --------------------------
 
 .. versionadded:: 1.1
@@ -1696,7 +1696,7 @@ introduction <categorical>` and the
 
     dfg.groupby(["A", [0, 0, 0, 1, 1]]).ngroup()
 
-Groupby by indexer to 'resample' data
+GroupBy by indexer to 'resample' data
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Resampling produces new hypothetical samples (resamples) from already existing observed data or from a model that generates data. These new samples are similar to the pre-existing samples.
 
@@ -147,7 +147,7 @@ Reduction and groupby operations such as :meth:`~DataFrame.sum` work as well.
    df.sum()
    df.groupby("B").A.sum()
 
-Scalar NA Value
+Scalar NA value
 ---------------
 
 :class:`arrays.IntegerArray` uses :attr:`pandas.NA` as its scalar
 
@@ -5996,7 +5996,7 @@ Full documentation can be found `here <https://pandas-gbq.readthedocs.io/en/late
 
 .. _io.stata:
 
-Stata format
+STATA format
 ------------
 
 .. _io.stata_writer:
 
@@ -47,7 +47,7 @@ Fixed regressions
 
 .. ---------------------------------------------------------------------------
 
-Indexing with nullable boolean arrays
+Indexing with nullable Boolean arrays
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Previously indexing with a nullable Boolean array containing ``NA`` would raise a ``ValueError``, however this is now permitted with ``NA`` being treated as ``False``. (:issue:`31503`)
 
@@ -32,7 +32,7 @@ enhancement1
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
 
--
+- The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`)
 -
 
 .. ---------------------------------------------------------------------------
 
@@ -592,6 +592,7 @@ Performance improvements
 - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
 - Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`)
 - Performance improvement in :meth:`CategoricalDtype.update_dtype` when ``dtype`` is a :class:`CategoricalDtype` with non ``None`` categories and ordered (:issue:`59647`)
+- Performance improvement in :meth:`DataFrame.astype` when converting to extension floating dtypes, e.g. "Float64" (:issue:`60066`)
 - Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`)
 - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
 - Performance improvement in indexing operations for string dtypes (:issue:`56997`)
 
@@ -105,6 +105,10 @@ class OptionError(AttributeError, KeyError):
 
     Backwards compatible with KeyError checks.
 
+    See Also
+    --------
+    options : Access and modify global pandas settings.
+
     Examples
     --------
     >>> pd.options.context
 
@@ -259,15 +259,23 @@ def is_iterator(obj: object) -> bool:
     Check if the object is an iterator.
 
     This is intended for generators, not list-like objects.
+    This method checks whether the passed object is an iterator. It
+    returns `True` if the object is an iterator, and `False` otherwise.
 
     Parameters
     ----------
     obj : The object to check
+        The object to check for iterator type.
 
     Returns
     -------
     is_iter : bool
         Whether `obj` is an iterator.
+        `True` if the object is of iterator type, otherwise `False`.
+
+    See Also
+    --------
+    api.types.is_list_like : Check if the input is list-like.
 
     Examples
     --------
@@ -1122,9 +1130,23 @@ def is_integer(obj: object) -> bool:
     """
     Return True if given object is integer.
 
+    This method checks whether the passed object is an integer type. It
+    returns `True` if the object is an integer, and `False` otherwise.
+
+    Parameters
+    ----------
+    obj : object
+        The object to check for integer type.
+
     Returns
     -------
     bool
+        `True` if the object is of integer type, otherwise `False`.
+
+    See Also
+    --------
+    api.types.is_float : Check if an object is of float type.
+    api.types.is_numeric_dtype : Check if an object is of numeric type.
 
     Examples
     --------
 
@@ -3371,6 +3371,10 @@ cdef class SemiMonthBegin(SemiMonthOffset):
     """
     Two DateOffset's per month repeating on the first day of the month & day_of_month.
 
+    This offset moves dates to the first day of the month and an additional specified
+    day (typically the 15th by default), useful in scenarios where bi-monthly processing
+    occurs on set days.
+
     Attributes
     ----------
     n : int, default 1
@@ -3380,6 +3384,13 @@ cdef class SemiMonthBegin(SemiMonthOffset):
     day_of_month : int, {1, 3,...,27}, default 15
         A specific integer for the day of the month.
 
+    See Also
+    --------
+    tseries.offsets.SemiMonthEnd : Two DateOffset's per month repeating on the last day
+        of the month & day_of_month.
+    tseries.offsets.MonthEnd : Offset to the last calendar day of the month.
+    tseries.offsets.MonthBegin : Offset to the first calendar day of the month.
+
     Examples
     --------
     >>> ts = pd.Timestamp(2022, 1, 1)
 
@@ -1864,10 +1864,12 @@ class Timedelta(_Timedelta):
 
     Parameters
     ----------
-    value : Timedelta, timedelta, np.timedelta64, str, or int
+    value : Timedelta, timedelta, np.timedelta64, str, int or float
         Input value.
     unit : str, default 'ns'
-        Denote the unit of the input, if input is an integer.
+        If input is an integer, denote the unit of the input.
+        If input is a float, denote the unit of the integer parts.
+        The decimal parts with resolution lower than 1 nanosecond are ignored.
 
         Possible values:
 
@@ -2176,8 +2178,10 @@ class Timedelta(_Timedelta):
         Parameters
         ----------
         freq : str
-            Frequency string indicating the ceiling resolution.
-            It uses the same units as class constructor :class:`~pandas.Timedelta`.
+            Frequency string indicating the ceiling resolution. Must be a fixed
+            frequency like 's' (second) not 'ME' (month end). See
+            :ref:`frequency aliases <timeseries.offset_aliases>` for
+            a list of possible `freq` values.
 
         Returns
         -------
 
@@ -667,7 +667,8 @@ def _create_mi_with_dt64tz_level():
 
 
 indices_dict = {
-    "string": Index([f"pandas_{i}" for i in range(10)]),
+    "object": Index([f"pandas_{i}" for i in range(10)], dtype=object),
+    "string": Index([f"pandas_{i}" for i in range(10)], dtype="str"),
     "datetime": date_range("2020-01-01", periods=10),
     "datetime-tz": date_range("2020-01-01", periods=10, tz="US/Pacific"),
     "period": period_range("2020-01-01", periods=10, freq="D"),
 
@@ -62,6 +62,10 @@ def _reductions(
         ):
             return libmissing.NA
 
+        if values.dtype == np.dtype(object):
+            # object dtype does not support `where` without passing an initial
+            values = values[~mask]
+            return func(values, axis=axis, **kwargs)
         return func(values, where=~mask, axis=axis, **kwargs)
 
 
 
@@ -68,6 +68,7 @@
     unpack_tuple_and_ellipses,
     validate_indices,
 )
+from pandas.core.nanops import check_below_min_count
 from pandas.core.strings.base import BaseStringArrayMethods
 
 from pandas.io._util import _arrow_dtype_mapping
@@ -1705,6 +1706,37 @@ def pyarrow_meth(data, skip_nulls, **kwargs):
                 denominator = pc.sqrt_checked(pc.count(self._pa_array))
                 return pc.divide_checked(numerator, denominator)
 
+        elif name == "sum" and (
+            pa.types.is_string(pa_type) or pa.types.is_large_string(pa_type)
+        ):
+
+            def pyarrow_meth(data, skip_nulls, min_count=0):  # type: ignore[misc]
+                mask = pc.is_null(data) if data.null_count > 0 else None
+                if skip_nulls:
+                    if min_count > 0 and check_below_min_count(
+                        (len(data),),
+                        None if mask is None else mask.to_numpy(),
+                        min_count,
+                    ):
+                        return pa.scalar(None, type=data.type)
+                    if data.null_count > 0:
+                        # binary_join returns null if there is any null ->
+                        # have to filter out any nulls
+                        data = data.filter(pc.invert(mask))
+                else:
+                    if mask is not None or check_below_min_count(
+                        (len(data),), None, min_count
+                    ):
+                        return pa.scalar(None, type=data.type)
+
+                if pa.types.is_large_string(data.type):
+                    # binary_join only supports string, not large_string
+                    data = data.cast(pa.string())
+                data_list = pa.ListArray.from_arrays(
+                    [0, len(data)], data.combine_chunks()
+                )[0]
+                return pc.binary_join(data_list, "")
+
         else:
             pyarrow_name = {
                 "median": "quantile",
Original file line number	Diff line number	Diff line change
`@@ -32,7 +32,7 @@ enhancement1`
`32`	`32`	`Other enhancements`
`33`	`33`	`^^^^^^^^^^^^^^^^^^`
`34`	`34`
`35`		`--`
	`35`	+- The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`)
`36`	`36`	`-`
`37`	`37`
`38`	`38`	`.. ---------------------------------------------------------------------------`