Skip to content

ENH: Add numba engine to rolling/expanding.std/var #44461

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 26 commits into from
Nov 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
76aa92e
ENH: Add numba engine to rolling.var
mroeschke Nov 15, 2021
450e601
Fix typing
mroeschke Nov 15, 2021
d9391fe
Add std, support multiple versions in numba args docstring
mroeschke Nov 15, 2021
5b7b448
Fix tests for std
mroeschke Nov 15, 2021
f3e7e69
Replace issue number in whatsnew
mroeschke Nov 15, 2021
91bd851
Add benchmarks
mroeschke Nov 15, 2021
443d21e
Ensure args are keyword only
mroeschke Nov 15, 2021
205576e
Merge remote-tracking branch 'upstream/master' into enh/numba_var
mroeschke Nov 15, 2021
d718280
Split calls
mroeschke Nov 15, 2021
ec2664a
Fix ordering of parameters
mroeschke Nov 15, 2021
c20e6ae
fix doc ordering in expanding
mroeschke Nov 15, 2021
0f4ed4f
Merge remote-tracking branch 'upstream/master' into enh/numba_var
mroeschke Nov 16, 2021
0b42f61
Merge remote-tracking branch 'upstream/master' into enh/numba_var
mroeschke Nov 17, 2021
3b0138c
Merge remote-tracking branch 'upstream/master' into enh/numba_var
mroeschke Nov 17, 2021
afe6ae4
Merge remote-tracking branch 'upstream/master' into enh/numba_var
mroeschke Nov 19, 2021
568cc06
Merge remote-tracking branch 'upstream/master' into enh/numba_var
mroeschke Nov 20, 2021
eef2290
Merge remote-tracking branch 'upstream/master' into enh/numba_var
mroeschke Nov 20, 2021
5b659d7
Merge remote-tracking branch 'upstream/master' into enh/numba_var
mroeschke Nov 20, 2021
99d44cf
Merge remote-tracking branch 'upstream/master' into enh/numba_var
mroeschke Nov 20, 2021
699cc8e
Merge remote-tracking branch 'upstream/master' into enh/numba_var
mroeschke Nov 21, 2021
0e24636
Merge remote-tracking branch 'upstream/master' into enh/numba_var
mroeschke Nov 22, 2021
63751f9
Merge remote-tracking branch 'upstream/master' into enh/numba_var
mroeschke Nov 23, 2021
0ba4077
Merge remote-tracking branch 'upstream/master' into enh/numba_var
mroeschke Nov 24, 2021
a128a78
Merge remote-tracking branch 'upstream/master' into enh/numba_var
mroeschke Nov 25, 2021
6202d65
Merge remote-tracking branch 'upstream/master' into enh/numba_var
mroeschke Nov 26, 2021
c8ca055
Merge remote-tracking branch 'upstream/master' into enh/numba_var
mroeschke Nov 26, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class NumbaEngineMethods:
["DataFrame", "Series"],
["int", "float"],
[("rolling", {"window": 10}), ("expanding", {})],
["sum", "max", "min", "median", "mean"],
["sum", "max", "min", "median", "mean", "var", "std"],
[True, False],
[None, 100],
)
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ Other enhancements
- :meth:`Timestamp.isoformat`, now handles the ``timespec`` argument from the base :class:``datetime`` class (:issue:`26131`)
- :meth:`NaT.to_numpy` ``dtype`` argument is now respected, so ``np.timedelta64`` can be returned (:issue:`44460`)
- New option ``display.max_dir_items`` customizes the number of columns added to :meth:`Dataframe.__dir__` and suggested for tab completion (:issue:`37996`)
- :meth:`.Rolling.var`, :meth:`.Expanding.var`, :meth:`.Rolling.std`, :meth:`.Expanding.std` now support `Numba <http://numba.pydata.org/>`_ execution with the ``engine`` keyword (:issue:`44461`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

at some point should group these doc-string changes together (maybe into a separate section ok too but not requried)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah once I add median I can group these better



.. ---------------------------------------------------------------------------
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/_numba/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,11 @@ def column_looper(
start: np.ndarray,
end: np.ndarray,
min_periods: int,
*args,
):
result = np.empty((len(start), values.shape[1]), dtype=np.float64)
for i in numba.prange(values.shape[1]):
result[:, i] = func(values[:, i], start, end, min_periods)
result[:, i] = func(values[:, i], start, end, min_periods, *args)
return result

return column_looper
3 changes: 2 additions & 1 deletion pandas/core/_numba/kernels/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pandas.core._numba.kernels.mean_ import sliding_mean
from pandas.core._numba.kernels.sum_ import sliding_sum
from pandas.core._numba.kernels.var_ import sliding_var

__all__ = ["sliding_mean", "sliding_sum"]
__all__ = ["sliding_mean", "sliding_sum", "sliding_var"]
116 changes: 116 additions & 0 deletions pandas/core/_numba/kernels/var_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
"""
Numba 1D var kernels that can be shared by
* Dataframe / Series
* groupby
* rolling / expanding

Mirrors pandas/_libs/window/aggregation.pyx
"""
from __future__ import annotations

import numba
import numpy as np

from pandas.core._numba.kernels.shared import is_monotonic_increasing


@numba.jit(nopython=True, nogil=True, parallel=False)
def add_var(
val: float, nobs: int, mean_x: float, ssqdm_x: float, compensation: float
) -> tuple[int, float, float, float]:
if not np.isnan(val):
nobs += 1
prev_mean = mean_x - compensation
y = val - compensation
t = y - mean_x
compensation = t + mean_x - y
delta = t
if nobs:
mean_x += delta / nobs
else:
mean_x = 0
ssqdm_x += (val - prev_mean) * (val - mean_x)
return nobs, mean_x, ssqdm_x, compensation


@numba.jit(nopython=True, nogil=True, parallel=False)
def remove_var(
val: float, nobs: int, mean_x: float, ssqdm_x: float, compensation: float
) -> tuple[int, float, float, float]:
if not np.isnan(val):
nobs -= 1
if nobs:
prev_mean = mean_x - compensation
y = val - compensation
t = y - mean_x
compensation = t + mean_x - y
delta = t
mean_x -= delta / nobs
ssqdm_x -= (val - prev_mean) * (val - mean_x)
else:
mean_x = 0
ssqdm_x = 0
return nobs, mean_x, ssqdm_x, compensation


@numba.jit(nopython=True, nogil=True, parallel=False)
def sliding_var(
values: np.ndarray,
start: np.ndarray,
end: np.ndarray,
min_periods: int,
ddof: int = 1,
) -> np.ndarray:
N = len(start)
nobs = 0
mean_x = 0.0
ssqdm_x = 0.0
compensation_add = 0.0
compensation_remove = 0.0

min_periods = max(min_periods, 1)
is_monotonic_increasing_bounds = is_monotonic_increasing(
start
) and is_monotonic_increasing(end)

output = np.empty(N, dtype=np.float64)

for i in range(N):
s = start[i]
e = end[i]
if i == 0 or not is_monotonic_increasing_bounds:
for j in range(s, e):
val = values[j]
nobs, mean_x, ssqdm_x, compensation_add = add_var(
val, nobs, mean_x, ssqdm_x, compensation_add
)
else:
for j in range(start[i - 1], s):
val = values[j]
nobs, mean_x, ssqdm_x, compensation_remove = remove_var(
val, nobs, mean_x, ssqdm_x, compensation_remove
)

for j in range(end[i - 1], e):
val = values[j]
nobs, mean_x, ssqdm_x, compensation_add = add_var(
val, nobs, mean_x, ssqdm_x, compensation_add
)

if nobs >= min_periods and nobs > ddof:
if nobs == 1:
result = 0.0
else:
result = ssqdm_x / (nobs - ddof)
else:
result = np.nan

output[i] = result

if not is_monotonic_increasing_bounds:
nobs = 0
mean_x = 0.0
ssqdm_x = 0.0
compensation_remove = 0.0

return output
16 changes: 11 additions & 5 deletions pandas/core/window/doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,14 +98,17 @@ def create_section_header(header: str) -> str:
"extended documentation and performance considerations for the Numba engine.\n\n"
)

window_agg_numba_parameters = dedent(
"""

def window_agg_numba_parameters(version: str = "1.3") -> str:
return (
dedent(
"""
engine : str, default None
* ``'cython'`` : Runs the operation through C-extensions from cython.
* ``'numba'`` : Runs the operation through JIT compiled code from numba.
* ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba``

.. versionadded:: 1.3.0
.. versionadded:: {version}.0

engine_kwargs : dict, default None
* For ``'cython'`` engine, there are no accepted ``engine_kwargs``
Expand All @@ -114,6 +117,9 @@ def create_section_header(header: str) -> str:
``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
``{{'nopython': True, 'nogil': False, 'parallel': False}}``

.. versionadded:: 1.3.0\n
.. versionadded:: {version}.0\n
"""
).replace("\n", "", 1)
)
.replace("\n", "", 1)
.replace("{version}", version)
)
4 changes: 2 additions & 2 deletions pandas/core/window/ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,7 @@ def aggregate(self, func, *args, **kwargs):
template_header,
create_section_header("Parameters"),
args_compat,
window_agg_numba_parameters,
window_agg_numba_parameters(),
kwargs_compat,
create_section_header("Returns"),
template_returns,
Expand Down Expand Up @@ -565,7 +565,7 @@ def mean(self, *args, engine=None, engine_kwargs=None, **kwargs):
template_header,
create_section_header("Parameters"),
args_compat,
window_agg_numba_parameters,
window_agg_numba_parameters(),
kwargs_compat,
create_section_header("Returns"),
template_returns,
Expand Down
38 changes: 29 additions & 9 deletions pandas/core/window/expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def apply(
template_header,
create_section_header("Parameters"),
args_compat,
window_agg_numba_parameters,
window_agg_numba_parameters(),
kwargs_compat,
create_section_header("Returns"),
template_returns,
Expand All @@ -253,7 +253,7 @@ def sum(
template_header,
create_section_header("Parameters"),
args_compat,
window_agg_numba_parameters,
window_agg_numba_parameters(),
kwargs_compat,
create_section_header("Returns"),
template_returns,
Expand All @@ -279,7 +279,7 @@ def max(
template_header,
create_section_header("Parameters"),
args_compat,
window_agg_numba_parameters,
window_agg_numba_parameters(),
kwargs_compat,
create_section_header("Returns"),
template_returns,
Expand All @@ -305,7 +305,7 @@ def min(
template_header,
create_section_header("Parameters"),
args_compat,
window_agg_numba_parameters,
window_agg_numba_parameters(),
kwargs_compat,
create_section_header("Returns"),
template_returns,
Expand All @@ -330,7 +330,7 @@ def mean(
@doc(
template_header,
create_section_header("Parameters"),
window_agg_numba_parameters,
window_agg_numba_parameters(),
kwargs_compat,
create_section_header("Returns"),
template_returns,
Expand Down Expand Up @@ -361,6 +361,7 @@ def median(
"""
).replace("\n", "", 1),
args_compat,
window_agg_numba_parameters("1.4"),
kwargs_compat,
create_section_header("Returns"),
template_returns,
Expand Down Expand Up @@ -396,9 +397,18 @@ def median(
aggregation_description="standard deviation",
agg_method="std",
)
def std(self, ddof: int = 1, *args, **kwargs):
def std(
self,
ddof: int = 1,
*args,
engine: str | None = None,
engine_kwargs: dict[str, bool] | None = None,
**kwargs,
):
nv.validate_expanding_func("std", args, kwargs)
return super().std(ddof=ddof, **kwargs)
return super().std(
ddof=ddof, engine=engine, engine_kwargs=engine_kwargs, **kwargs
)

@doc(
template_header,
Expand All @@ -411,6 +421,7 @@ def std(self, ddof: int = 1, *args, **kwargs):
"""
).replace("\n", "", 1),
args_compat,
window_agg_numba_parameters("1.4"),
kwargs_compat,
create_section_header("Returns"),
template_returns,
Expand Down Expand Up @@ -446,9 +457,18 @@ def std(self, ddof: int = 1, *args, **kwargs):
aggregation_description="variance",
agg_method="var",
)
def var(self, ddof: int = 1, *args, **kwargs):
def var(
self,
ddof: int = 1,
*args,
engine: str | None = None,
engine_kwargs: dict[str, bool] | None = None,
**kwargs,
):
nv.validate_expanding_func("var", args, kwargs)
return super().var(ddof=ddof, **kwargs)
return super().var(
ddof=ddof, engine=engine, engine_kwargs=engine_kwargs, **kwargs
)

@doc(
template_header,
Expand Down
Loading