-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
TYP: groupby, sorting #46133
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
TYP: groupby, sorting #46133
Changes from all commits
c1feca1
4f221a0
c3b9fc8
51cb445
c1ec2f9
877761e
42a60b9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -224,7 +224,9 @@ def is_int64_overflow_possible(shape: Shape) -> bool: | |
return the_prod >= lib.i8max | ||
|
||
|
||
def decons_group_index(comp_labels, shape: Shape): | ||
def _decons_group_index( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just curious on the rename here - why specifically make this one lead with an underscore? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is the only thing that stood out to me as being privatize-able |
||
comp_labels: npt.NDArray[np.intp], shape: Shape | ||
) -> list[npt.NDArray[np.intp]]: | ||
# reconstruct labels | ||
if is_int64_overflow_possible(shape): | ||
# at some point group indices are factorized, | ||
|
@@ -233,7 +235,7 @@ def decons_group_index(comp_labels, shape: Shape): | |
|
||
label_list = [] | ||
factor = 1 | ||
y = 0 | ||
y = np.array(0) | ||
x = comp_labels | ||
for i in reversed(range(len(shape))): | ||
labels = (x - y) % (factor * shape[i]) // factor | ||
|
@@ -245,24 +247,32 @@ def decons_group_index(comp_labels, shape: Shape): | |
|
||
|
||
def decons_obs_group_ids( | ||
comp_ids: npt.NDArray[np.intp], obs_ids, shape: Shape, labels, xnull: bool | ||
): | ||
comp_ids: npt.NDArray[np.intp], | ||
obs_ids: npt.NDArray[np.intp], | ||
shape: Shape, | ||
labels: Sequence[npt.NDArray[np.signedinteger]], | ||
xnull: bool, | ||
) -> list[npt.NDArray[np.intp]]: | ||
""" | ||
Reconstruct labels from observed group ids. | ||
|
||
Parameters | ||
---------- | ||
comp_ids : np.ndarray[np.intp] | ||
obs_ids: np.ndarray[np.intp] | ||
shape : tuple[int] | ||
labels : Sequence[np.ndarray[np.signedinteger]] | ||
xnull : bool | ||
If nulls are excluded; i.e. -1 labels are passed through. | ||
""" | ||
if not xnull: | ||
lift = np.fromiter(((a == -1).any() for a in labels), dtype="i8") | ||
shape = np.asarray(shape, dtype="i8") + lift | ||
lift = np.fromiter(((a == -1).any() for a in labels), dtype=np.intp) | ||
arr_shape = np.asarray(shape, dtype=np.intp) + lift | ||
shape = tuple(arr_shape) | ||
|
||
if not is_int64_overflow_possible(shape): | ||
# obs ids are deconstructable! take the fast route! | ||
out = decons_group_index(obs_ids, shape) | ||
out = _decons_group_index(obs_ids, shape) | ||
return out if xnull or not lift.any() else [x - y for x, y in zip(out, lift)] | ||
|
||
indexer = unique_label_indices(comp_ids) | ||
|
Uh oh!
There was an error while loading. Please reload this page.