Skip to content

Commit bbefde5

Browse files
authored
CLN: union_indexes (#58183)
* Clean up logic in union_indexes * add typing * Just use the generator version * Undo typing
1 parent 2e9e89a commit bbefde5

File tree

3 files changed

+30
-95
lines changed

3 files changed

+30
-95
lines changed

pandas/_libs/lib.pyi

-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ def fast_multiget(
6767
default=...,
6868
) -> ArrayLike: ...
6969
def fast_unique_multiple_list_gen(gen: Generator, sort: bool = ...) -> list: ...
70-
def fast_unique_multiple_list(lists: list, sort: bool | None = ...) -> list: ...
7170
@overload
7271
def map_infer(
7372
arr: np.ndarray,

pandas/_libs/lib.pyx

+3-31
Original file line numberDiff line numberDiff line change
@@ -312,34 +312,6 @@ def item_from_zerodim(val: object) -> object:
312312
return val
313313

314314

315-
@cython.wraparound(False)
316-
@cython.boundscheck(False)
317-
def fast_unique_multiple_list(lists: list, sort: bool | None = True) -> list:
318-
cdef:
319-
list buf
320-
Py_ssize_t k = len(lists)
321-
Py_ssize_t i, j, n
322-
list uniques = []
323-
dict table = {}
324-
object val, stub = 0
325-
326-
for i in range(k):
327-
buf = lists[i]
328-
n = len(buf)
329-
for j in range(n):
330-
val = buf[j]
331-
if val not in table:
332-
table[val] = stub
333-
uniques.append(val)
334-
if sort:
335-
try:
336-
uniques.sort()
337-
except TypeError:
338-
pass
339-
340-
return uniques
341-
342-
343315
@cython.wraparound(False)
344316
@cython.boundscheck(False)
345317
def fast_unique_multiple_list_gen(object gen, bint sort=True) -> list:
@@ -361,15 +333,15 @@ def fast_unique_multiple_list_gen(object gen, bint sort=True) -> list:
361333
list buf
362334
Py_ssize_t j, n
363335
list uniques = []
364-
dict table = {}
365-
object val, stub = 0
336+
set table = set()
337+
object val
366338

367339
for buf in gen:
368340
n = len(buf)
369341
for j in range(n):
370342
val = buf[j]
371343
if val not in table:
372-
table[val] = stub
344+
table.add(val)
373345
uniques.append(val)
374346
if sort:
375347
try:

pandas/core/indexes/api.py

+27-63
Original file line numberDiff line numberDiff line change
@@ -209,60 +209,6 @@ def union_indexes(indexes, sort: bool | None = True) -> Index:
209209

210210
indexes, kind = _sanitize_and_check(indexes)
211211

212-
def _unique_indices(inds, dtype) -> Index:
213-
"""
214-
Concatenate indices and remove duplicates.
215-
216-
Parameters
217-
----------
218-
inds : list of Index or list objects
219-
dtype : dtype to set for the resulting Index
220-
221-
Returns
222-
-------
223-
Index
224-
"""
225-
if all(isinstance(ind, Index) for ind in inds):
226-
inds = [ind.astype(dtype, copy=False) for ind in inds]
227-
result = inds[0].unique()
228-
other = inds[1].append(inds[2:])
229-
diff = other[result.get_indexer_for(other) == -1]
230-
if len(diff):
231-
result = result.append(diff.unique())
232-
if sort:
233-
result = result.sort_values()
234-
return result
235-
236-
def conv(i):
237-
if isinstance(i, Index):
238-
i = i.tolist()
239-
return i
240-
241-
return Index(
242-
lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort),
243-
dtype=dtype,
244-
)
245-
246-
def _find_common_index_dtype(inds):
247-
"""
248-
Finds a common type for the indexes to pass through to resulting index.
249-
250-
Parameters
251-
----------
252-
inds: list of Index or list objects
253-
254-
Returns
255-
-------
256-
The common type or None if no indexes were given
257-
"""
258-
dtypes = [idx.dtype for idx in indexes if isinstance(idx, Index)]
259-
if dtypes:
260-
dtype = find_common_type(dtypes)
261-
else:
262-
dtype = None
263-
264-
return dtype
265-
266212
if kind == "special":
267213
result = indexes[0]
268214

@@ -294,18 +240,36 @@ def _find_common_index_dtype(inds):
294240
return result
295241

296242
elif kind == "array":
297-
dtype = _find_common_index_dtype(indexes)
298-
index = indexes[0]
299-
if not all(index.equals(other) for other in indexes[1:]):
300-
index = _unique_indices(indexes, dtype)
243+
if not all_indexes_same(indexes):
244+
dtype = find_common_type([idx.dtype for idx in indexes])
245+
inds = [ind.astype(dtype, copy=False) for ind in indexes]
246+
index = inds[0].unique()
247+
other = inds[1].append(inds[2:])
248+
diff = other[index.get_indexer_for(other) == -1]
249+
if len(diff):
250+
index = index.append(diff.unique())
251+
if sort:
252+
index = index.sort_values()
253+
else:
254+
index = indexes[0]
301255

302256
name = get_unanimous_names(*indexes)[0]
303257
if name != index.name:
304258
index = index.rename(name)
305259
return index
306-
else: # kind='list'
307-
dtype = _find_common_index_dtype(indexes)
308-
return _unique_indices(indexes, dtype)
260+
elif kind == "list":
261+
dtypes = [idx.dtype for idx in indexes if isinstance(idx, Index)]
262+
if dtypes:
263+
dtype = find_common_type(dtypes)
264+
else:
265+
dtype = None
266+
all_lists = (idx.tolist() if isinstance(idx, Index) else idx for idx in indexes)
267+
return Index(
268+
lib.fast_unique_multiple_list_gen(all_lists, sort=bool(sort)),
269+
dtype=dtype,
270+
)
271+
else:
272+
raise ValueError(f"{kind=} must be 'special', 'array' or 'list'.")
309273

310274

311275
def _sanitize_and_check(indexes):
@@ -329,14 +293,14 @@ def _sanitize_and_check(indexes):
329293
sanitized_indexes : list of Index or list objects
330294
type : {'list', 'array', 'special'}
331295
"""
332-
kinds = list({type(index) for index in indexes})
296+
kinds = {type(index) for index in indexes}
333297

334298
if list in kinds:
335299
if len(kinds) > 1:
336300
indexes = [
337301
Index(list(x)) if not isinstance(x, Index) else x for x in indexes
338302
]
339-
kinds.remove(list)
303+
kinds -= {list}
340304
else:
341305
return indexes, "list"
342306

0 commit comments

Comments
 (0)