Skip to content

REF: define concat classmethods in the appropriate places #27727

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 4 additions & 79 deletions pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,11 @@
is_timedelta64_dtype,
)
from pandas.core.dtypes.generic import (
ABCCategoricalIndex,
ABCDatetimeArray,
ABCDatetimeIndex,
ABCIndexClass,
ABCPeriodIndex,
ABCRangeIndex,
ABCTimedeltaIndex,
ABCSeries,
)


Expand Down Expand Up @@ -285,14 +284,14 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False):
[b, c, a, b]
Categories (3, object): [b, c, a]
"""
from pandas import Index, Categorical, CategoricalIndex, Series
from pandas import Index, Categorical
from pandas.core.arrays.categorical import _recode_for_categories

if len(to_union) == 0:
raise ValueError("No Categoricals to union")

def _maybe_unwrap(x):
if isinstance(x, (CategoricalIndex, Series)):
if isinstance(x, (ABCCategoricalIndex, ABCSeries)):
return x.values
elif isinstance(x, Categorical):
return x
Expand Down Expand Up @@ -450,31 +449,6 @@ def _concat_datetimetz(to_concat, name=None):
return sample._concat_same_type(to_concat)


def _concat_index_same_dtype(indexes, klass=None):
klass = klass if klass is not None else indexes[0].__class__
return klass(np.concatenate([x._values for x in indexes]))


def _concat_index_asobject(to_concat, name=None):
"""
concat all inputs as object. DatetimeIndex, TimedeltaIndex and
PeriodIndex are converted to object dtype before concatenation
"""
from pandas import Index
from pandas.core.arrays import ExtensionArray

klasses = (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex, ExtensionArray)
to_concat = [x.astype(object) if isinstance(x, klasses) else x for x in to_concat]

self = to_concat[0]
attribs = self._get_attributes_dict()
attribs["name"] = name

to_concat = [x._values if isinstance(x, Index) else x for x in to_concat]

return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs)


def _concat_sparse(to_concat, axis=0, typs=None):
"""
provide concatenation of an sparse/dense array of arrays each of which is a
Expand Down Expand Up @@ -505,52 +479,3 @@ def _concat_sparse(to_concat, axis=0, typs=None):
]

return SparseArray._concat_same_type(to_concat)


def _concat_rangeindex_same_dtype(indexes):
"""
Concatenates multiple RangeIndex instances. All members of "indexes" must
be of type RangeIndex; result will be RangeIndex if possible, Int64Index
otherwise. E.g.:
indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6)
indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5])
"""
from pandas import Int64Index, RangeIndex

start = step = next_ = None

# Filter the empty indexes
non_empty_indexes = [obj for obj in indexes if len(obj)]

for obj in non_empty_indexes:
rng = obj._range # type: range

if start is None:
# This is set by the first non-empty index
start = rng.start
if step is None and len(rng) > 1:
step = rng.step
elif step is None:
# First non-empty index had only one element
if rng.start == start:
return _concat_index_same_dtype(indexes, klass=Int64Index)
step = rng.start - start

non_consecutive = (step != rng.step and len(rng) > 1) or (
next_ is not None and rng.start != next_
)
if non_consecutive:
return _concat_index_same_dtype(indexes, klass=Int64Index)

if step is not None:
next_ = rng[-1] + step

if non_empty_indexes:
# Get the stop value from "next" or alternatively
# from the last non-empty index
stop = non_empty_indexes[-1].stop if next_ is None else next_
return RangeIndex(start, stop, step)

# Here all "indexes" had 0 length, i.e. were empty.
# In this case return an empty range index.
return RangeIndex(0, 0)
16 changes: 14 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
ABCDataFrame,
ABCDateOffset,
ABCDatetimeArray,
ABCDatetimeIndex,
ABCIndexClass,
ABCMultiIndex,
ABCPandasArray,
Expand Down Expand Up @@ -4309,14 +4310,25 @@ def _concat(self, to_concat, name):

if len(typs) == 1:
return self._concat_same_dtype(to_concat, name=name)
return _concat._concat_index_asobject(to_concat, name=name)
return Index._concat_same_dtype(self, to_concat, name=name)

def _concat_same_dtype(self, to_concat, name):
"""
Concatenate to_concat which has the same class.
"""
# must be overridden in specific classes
return _concat._concat_index_asobject(to_concat, name)
klasses = (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex, ExtensionArray)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should there be references to specific Index types in pandas/core/indexes/base.py?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We've already got a few; in this case Index is serving as less of a base class and more of a ObjectIndex.

to_concat = [
x.astype(object) if isinstance(x, klasses) else x for x in to_concat
]

self = to_concat[0]
attribs = self._get_attributes_dict()
attribs["name"] = name

to_concat = [x._values if isinstance(x, Index) else x for x in to_concat]

return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs)

def putmask(self, mask, value):
"""
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
needs_i8_conversion,
pandas_dtype,
)
import pandas.core.dtypes.concat as _concat
from pandas.core.dtypes.generic import (
ABCFloat64Index,
ABCInt64Index,
Expand Down Expand Up @@ -129,7 +128,8 @@ def _assert_safe_casting(cls, data, subarr):
pass

def _concat_same_dtype(self, indexes, name):
return _concat._concat_index_same_dtype(indexes).rename(name)
result = type(indexes[0])(np.concatenate([x._values for x in indexes]))
return result.rename(name)

@property
def is_all_dates(self):
Expand Down
49 changes: 47 additions & 2 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from pandas.compat.numpy import function as nv
from pandas.util._decorators import Appender, cache_readonly

from pandas.core.dtypes import concat as _concat
from pandas.core.dtypes.common import (
ensure_platform_int,
ensure_python_int,
Expand Down Expand Up @@ -647,7 +646,53 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False)
return super().join(other, how, level, return_indexers, sort)

def _concat_same_dtype(self, indexes, name):
return _concat._concat_rangeindex_same_dtype(indexes).rename(name)
"""
Concatenates multiple RangeIndex instances. All members of "indexes" must
be of type RangeIndex; result will be RangeIndex if possible, Int64Index
otherwise. E.g.:
indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6)
indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5])
"""
start = step = next_ = None

# Filter the empty indexes
non_empty_indexes = [obj for obj in indexes if len(obj)]

for obj in non_empty_indexes:
rng = obj._range # type: range

if start is None:
# This is set by the first non-empty index
start = rng.start
if step is None and len(rng) > 1:
step = rng.step
elif step is None:
# First non-empty index had only one element
if rng.start == start:
result = Int64Index(np.concatenate([x._values for x in indexes]))
return result.rename(name)

step = rng.start - start

non_consecutive = (step != rng.step and len(rng) > 1) or (
next_ is not None and rng.start != next_
)
if non_consecutive:
result = Int64Index(np.concatenate([x._values for x in indexes]))
return result.rename(name)

if step is not None:
next_ = rng[-1] + step

if non_empty_indexes:
# Get the stop value from "next" or alternatively
# from the last non-empty index
stop = non_empty_indexes[-1].stop if next_ is None else next_
return RangeIndex(start, stop, step).rename(name)

# Here all "indexes" had 0 length, i.e. were empty.
# In this case return an empty range index.
return RangeIndex(0, 0).rename(name)

def __len__(self):
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ def test_append(self):
tm.assert_index_equal(result, expected, exact=True)

def test_append_to_another(self):
# hits _concat_index_asobject
# hits Index._concat_same_dtype
fst = Index(["a", "b"])
snd = CategoricalIndex(["d", "e"])
result = fst.append(snd)
Expand Down