-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
WIP: PERF: Cythonize fillna #42309
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP: PERF: Cythonize fillna #42309
Changes from 8 commits
e5e87fe
f60d43e
55ca1f1
18eee84
21fcbd2
2d785de
77c0563
e004991
af5100b
13fe232
f425065
433e698
f6b8f56
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -434,40 +434,56 @@ def fillna( | |
fillna on the block with the value. If we fail, then convert to | ||
ObjectBlock and try again | ||
""" | ||
# TODO: Handle inf_as_na, we need to get option and pass to cython funcs | ||
inplace = validate_bool_kwarg(inplace, "inplace") | ||
|
||
mask = isna(self.values) | ||
mask, noop = validate_putmask(self.values, mask) | ||
|
||
if limit is not None: | ||
limit = libalgos.validate_limit(None, limit=limit) | ||
mask[mask.cumsum(self.ndim - 1) > limit] = False | ||
arr = self if inplace else self.copy() | ||
limit = libalgos.validate_limit( | ||
len(self) if self.ndim == 1 else self.shape[1], limit=limit | ||
lithomas1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
|
||
if not self._can_hold_na: | ||
if inplace: | ||
return [self] | ||
return [arr] | ||
|
||
if not self.is_extension: | ||
if self._can_hold_element(value): | ||
if self.ndim == 1: | ||
if is_list_like(value): | ||
# TODO: Verify EA case | ||
if is_extension_array_dtype(value): | ||
mask = value.isna() | ||
value = np.asarray(value[mask], dtype=object) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. going straight to object may be unnecessarily heavy for e.g. Categorical |
||
libalgos.fillna1d_multi_values( | ||
arr.values[mask], value=value, limit=limit | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is going to fill arr.values[mask] inplace, but i dont think arr.values[mask] shares data with arr.values? |
||
) | ||
else: | ||
libalgos.fillna1d_multi_values( | ||
arr.values, value=value, limit=limit | ||
) | ||
else: | ||
libalgos.fillna1d(arr.values, value=value, limit=limit) | ||
else: | ||
libalgos.fillna2d(arr.values, value=value, limit=limit) | ||
return arr._maybe_downcast([arr], downcast) | ||
elif self.ndim == 1 or self.shape[0] == 1: | ||
blk = self.coerce_to_target_dtype(value) | ||
# bc we have already cast, inplace=True may avoid an extra copy | ||
return blk.fillna(value, limit=limit, inplace=True, downcast=None) | ||
else: | ||
return [self.copy()] | ||
|
||
if self._can_hold_element(value): | ||
nb = self if inplace else self.copy() | ||
putmask_inplace(nb.values, mask, value) | ||
return nb._maybe_downcast([nb], downcast) | ||
|
||
if noop: | ||
# we can't process the value, but nothing to do | ||
return [self] if inplace else [self.copy()] | ||
|
||
elif self.ndim == 1 or self.shape[0] == 1: | ||
blk = self.coerce_to_target_dtype(value) | ||
# bc we have already cast, inplace=True may avoid an extra copy | ||
return blk.fillna(value, limit=limit, inplace=True, downcast=None) | ||
|
||
# operate column-by-column | ||
return self.split_and_operate( | ||
type(self).fillna, | ||
value, | ||
limit=limit, | ||
inplace=inplace, | ||
downcast=None, | ||
) | ||
else: | ||
# operate column-by-column | ||
return self.split_and_operate( | ||
type(self).fillna, value, limit=limit, inplace=inplace, downcast=None | ||
) | ||
# TODO: This seems to work for EAS, verify it does | ||
return [ | ||
self.make_block_same_class( | ||
values=self.values.fillna(value=value, limit=limit) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you'll probably need to make NDArrayBackedExtensionArray.fillna use the new cython function(s) |
||
) | ||
] | ||
|
||
@final | ||
def _split(self) -> list[Block]: | ||
|
Uh oh!
There was an error while loading. Please reload this page.