-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: Add numeric_only to certain groupby ops #46728
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 9 commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
f7b76f2
ENH: Add numeric_only to certain groupby ops
rhshadrach d6129de
Merge branch 'main' of https://github.com/pandas-dev/pandas into add_…
rhshadrach d434816
Fix type-hint
rhshadrach ebf777a
test fixup
rhshadrach bb424ba
Merge branch 'main' of https://github.com/pandas-dev/pandas into add_…
rhshadrach de2e7b3
Merge branch 'main' of https://github.com/pandas-dev/pandas into add_…
rhshadrach 58e9ddc
fixup
rhshadrach 134313e
Merge branch 'main' of https://github.com/pandas-dev/pandas into add_…
rhshadrach 6921c94
Merge branch 'add_numeric_only_gb' of https://github.com/rhshadrach/p…
rhshadrach 8e435ab
Merge branch 'main' of https://github.com/pandas-dev/pandas into add_…
rhshadrach bc53f90
Merge branch 'main' of https://github.com/pandas-dev/pandas into add_…
rhshadrach 88caf9b
Simplify var
rhshadrach f2bee5f
Merge branch 'main' of https://github.com/pandas-dev/pandas into add_…
rhshadrach dbd81cd
fixup
rhshadrach b176238
Merge branch 'main' of https://github.com/pandas-dev/pandas into add_…
rhshadrach c79c700
fixup
rhshadrach File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1502,7 +1502,7 @@ def _python_apply_general( | |
) | ||
|
||
@final | ||
def _python_agg_general(self, func, *args, **kwargs): | ||
def _python_agg_general(self, func, *args, raise_on_typeerror=False, **kwargs): | ||
func = com.is_builtin_func(func) | ||
f = lambda x: func(x, *args, **kwargs) | ||
|
||
|
@@ -1520,6 +1520,8 @@ def _python_agg_general(self, func, *args, **kwargs): | |
# if this function is invalid for this dtype, we will ignore it. | ||
result = self.grouper.agg_series(obj, f) | ||
except TypeError: | ||
if raise_on_typeerror: | ||
raise | ||
warn_dropping_nuisance_columns_deprecated(type(self), "agg") | ||
continue | ||
|
||
|
@@ -1593,7 +1595,12 @@ def _agg_py_fallback( | |
|
||
@final | ||
def _cython_agg_general( | ||
self, how: str, alt: Callable, numeric_only: bool, min_count: int = -1 | ||
self, | ||
how: str, | ||
alt: Callable, | ||
numeric_only: bool, | ||
min_count: int = -1, | ||
ignore_failures: bool = True, | ||
): | ||
# Note: we never get here with how="ohlc" for DataFrameGroupBy; | ||
# that goes through SeriesGroupBy | ||
|
@@ -1629,7 +1636,7 @@ def array_func(values: ArrayLike) -> ArrayLike: | |
|
||
# TypeError -> we may have an exception in trying to aggregate | ||
# continue and exclude the block | ||
new_mgr = data.grouped_reduce(array_func, ignore_failures=True) | ||
new_mgr = data.grouped_reduce(array_func, ignore_failures=ignore_failures) | ||
|
||
if not is_ser and len(new_mgr) < len(data): | ||
warn_dropping_nuisance_columns_deprecated(type(self), how) | ||
|
@@ -2041,6 +2048,7 @@ def std( | |
ddof: int = 1, | ||
engine: str | None = None, | ||
engine_kwargs: dict[str, bool] | None = None, | ||
numeric_only: bool | lib.NoDefault = lib.no_default, | ||
): | ||
""" | ||
Compute standard deviation of groups, excluding missing values. | ||
|
@@ -2069,6 +2077,11 @@ def std( | |
|
||
.. versionadded:: 1.4.0 | ||
|
||
numeric_only : bool, default True | ||
Include only `float`, `int` or `boolean` data. | ||
|
||
.. versionadded:: 1.5.0 | ||
|
||
Returns | ||
------- | ||
Series or DataFrame | ||
|
@@ -2081,8 +2094,9 @@ def std( | |
else: | ||
return self._get_cythonized_result( | ||
libgroupby.group_var, | ||
needs_counts=True, | ||
cython_dtype=np.dtype(np.float64), | ||
numeric_only=numeric_only, | ||
needs_counts=True, | ||
post_processing=lambda vals, inference: np.sqrt(vals), | ||
ddof=ddof, | ||
) | ||
|
@@ -2095,6 +2109,7 @@ def var( | |
ddof: int = 1, | ||
engine: str | None = None, | ||
engine_kwargs: dict[str, bool] | None = None, | ||
numeric_only: bool | lib.NoDefault = lib.no_default, | ||
): | ||
""" | ||
Compute variance of groups, excluding missing values. | ||
|
@@ -2123,6 +2138,11 @@ def var( | |
|
||
.. versionadded:: 1.4.0 | ||
|
||
numeric_only : bool, default True | ||
Include only `float`, `int` or `boolean` data. | ||
|
||
.. versionadded:: 1.5.0 | ||
|
||
Returns | ||
------- | ||
Series or DataFrame | ||
|
@@ -2133,22 +2153,37 @@ def var( | |
|
||
return self._numba_agg_general(sliding_var, engine_kwargs, ddof) | ||
else: | ||
ignore_failures = numeric_only is lib.no_default | ||
numeric_only = self._resolve_numeric_only(numeric_only) | ||
if ddof == 1: | ||
numeric_only = self._resolve_numeric_only(lib.no_default) | ||
return self._cython_agg_general( | ||
"var", | ||
alt=lambda x: Series(x).var(ddof=ddof), | ||
numeric_only=numeric_only, | ||
ignore_failures=ignore_failures, | ||
) | ||
else: | ||
func = lambda x: x.var(ddof=ddof) | ||
with self._group_selection_context(): | ||
return self._python_agg_general(func) | ||
if numeric_only: | ||
nonnumeric_exclusions = frozenset( | ||
self.obj.columns.difference(self.exclusions).difference( | ||
self.obj._get_numeric_data().columns | ||
) | ||
) | ||
else: | ||
nonnumeric_exclusions = frozenset() | ||
with com.temp_setattr( | ||
self, "exclusions", self.exclusions | nonnumeric_exclusions | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is the purpose here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks - I was able to remove this hack by setting the appropriate value of raise_on_typeerror. |
||
): | ||
with self._group_selection_context(): | ||
return self._python_agg_general( | ||
func, raise_on_typeerror=not ignore_failures | ||
) | ||
|
||
@final | ||
@Substitution(name="groupby") | ||
@Appender(_common_see_also) | ||
def sem(self, ddof: int = 1): | ||
def sem(self, ddof: int = 1, numeric_only: bool | lib.NoDefault = lib.no_default): | ||
""" | ||
Compute standard error of the mean of groups, excluding missing values. | ||
|
||
|
@@ -2159,12 +2194,17 @@ def sem(self, ddof: int = 1): | |
ddof : int, default 1 | ||
Degrees of freedom. | ||
|
||
numeric_only : bool, default True | ||
Include only `float`, `int` or `boolean` data. | ||
|
||
.. versionadded:: 1.5.0 | ||
|
||
Returns | ||
------- | ||
Series or DataFrame | ||
Standard error of the mean of values within each group. | ||
""" | ||
result = self.std(ddof=ddof) | ||
result = self.std(ddof=ddof, numeric_only=numeric_only) | ||
if result.ndim == 1: | ||
result /= np.sqrt(self.count()) | ||
else: | ||
|
@@ -2968,7 +3008,12 @@ def nth( | |
return result | ||
|
||
@final | ||
def quantile(self, q=0.5, interpolation: str = "linear"): | ||
def quantile( | ||
self, | ||
q=0.5, | ||
interpolation: str = "linear", | ||
numeric_only: bool | lib.NoDefault = lib.no_default, | ||
): | ||
""" | ||
Return group values at the given quantile, a la numpy.percentile. | ||
|
||
|
@@ -2978,6 +3023,10 @@ def quantile(self, q=0.5, interpolation: str = "linear"): | |
Value(s) between 0 and 1 providing the quantile(s) to compute. | ||
interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} | ||
Method to use when the desired quantile falls between two points. | ||
numeric_only : bool, default True | ||
Include only `float`, `int` or `boolean` data. | ||
|
||
.. versionadded:: 1.5.0 | ||
|
||
Returns | ||
------- | ||
|
@@ -3002,6 +3051,7 @@ def quantile(self, q=0.5, interpolation: str = "linear"): | |
a 2.0 | ||
b 3.0 | ||
""" | ||
numeric_only_bool = self._resolve_numeric_only(numeric_only) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, np.dtype | None]: | ||
if is_object_dtype(vals): | ||
|
@@ -3095,9 +3145,15 @@ def blk_func(values: ArrayLike) -> ArrayLike: | |
obj = self._obj_with_exclusions | ||
is_ser = obj.ndim == 1 | ||
mgr = self._get_data_to_aggregate() | ||
|
||
res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True) | ||
if not is_ser and len(res_mgr.items) != len(mgr.items): | ||
data = mgr.get_numeric_data() if numeric_only_bool else mgr | ||
ignore_failures = numeric_only_bool | ||
res_mgr = data.grouped_reduce(blk_func, ignore_failures=ignore_failures) | ||
|
||
if ( | ||
numeric_only is lib.no_default | ||
and not is_ser | ||
and len(res_mgr.items) != len(mgr.items) | ||
): | ||
warn_dropping_nuisance_columns_deprecated(type(self), "quantile") | ||
|
||
if len(res_mgr.items) == 0: | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.