Skip to content

Commit 36c6240

Browse files
committed
Review (WillAyd)
1 parent 0d3c6d2 commit 36c6240

File tree

1 file changed

+21
-21
lines changed

1 file changed

+21
-21
lines changed

pandas/core/strings.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,15 @@
3737
_shared_docs = dict()
3838

3939

40-
def cat_core(list_of_columns, sep):
40+
def cat_core(all_cols, sep):
4141
"""
4242
Auxiliary function for :meth:`str.cat`
4343
4444
Parameters
4545
----------
46-
list_of_columns : list of numpy arrays
47-
List of arrays to be concatenated with sep;
48-
these arrays may not contain NaNs!
46+
all_cols : two-dimensional numpy array
47+
array of columns to be concatenated with sep;
48+
this array may not contain NaNs!
4949
sep : string
5050
The separator string for concatenating the columns
5151
@@ -54,9 +54,12 @@ def cat_core(list_of_columns, sep):
5454
nd.array
5555
The concatenation of list_of_columns with sep
5656
"""
57+
list_of_columns = np.split(all_cols, all_cols.shape[1], axis=1)
5758
list_with_sep = [sep] * (2 * len(list_of_columns) - 1)
5859
list_with_sep[::2] = list_of_columns
59-
return np.sum(list_with_sep, axis=0)
60+
# np.split splits into arrays of shape (N, 1); NOT (N,)
61+
# need to reduce dimensionality of result
62+
return np.sum(list_with_sep, axis=0)[:, 0]
6063

6164

6265
def _na_map(f, arr, na_result=np.nan, dtype=object):
@@ -2239,21 +2242,21 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
22392242
"'outer'|'inner'|'right'`. The future default will "
22402243
"be `join='left'`.", FutureWarning, stacklevel=2)
22412244

2242-
# if join is None, _get_series_list already aligned indexes
2243-
join = 'left' if join is None else join
2245+
# concatenate others into DataFrame; need to add keys for uniqueness in
2246+
# case of duplicate columns (for join is None, all indexes are already
2247+
# the same after _get_series_list, which forces alignment in this case)
2248+
others = concat(others, axis=1,
2249+
join=(join if join == 'inner' else 'outer'),
2250+
keys=range(len(others)), copy=False)
22442251

22452252
# align if required
2246-
if any(not data.index.equals(x.index) for x in others):
2247-
# Need to add keys for uniqueness in case of duplicate columns
2248-
others = concat(others, axis=1,
2249-
join=(join if join == 'inner' else 'outer'),
2250-
keys=range(len(others)), copy=False)
2253+
if not data.index.equals(others.index):
22512254
data, others = data.align(others, join=join)
2252-
others = [others[x] for x in others] # again list of Series
22532255

2254-
all_cols = [ensure_object(x) for x in [data] + others]
2255-
na_masks = np.array([isna(x) for x in all_cols])
2256-
union_mask = np.logical_or.reduce(na_masks, axis=0)
2256+
# collect all columns
2257+
all_cols = ensure_object(concat([data, others], axis=1, copy=False))
2258+
na_masks = isna(all_cols)
2259+
union_mask = np.logical_or.reduce(na_masks, axis=1)
22572260

22582261
if na_rep is None and union_mask.any():
22592262
# no na_rep means NaNs for all rows where any column has a NaN
@@ -2262,13 +2265,10 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
22622265
np.putmask(result, union_mask, np.nan)
22632266

22642267
not_masked = ~union_mask
2265-
result[not_masked] = cat_core([x[not_masked] for x in all_cols],
2266-
sep)
2268+
result[not_masked] = cat_core(all_cols[not_masked], sep)
22672269
elif na_rep is not None and union_mask.any():
22682270
# fill NaNs with na_rep in case there are actually any NaNs
2269-
all_cols = [np.where(nm, na_rep, col)
2270-
for nm, col in zip(na_masks, all_cols)]
2271-
result = cat_core(all_cols, sep)
2271+
result = cat_core(np.where(na_masks, na_rep, all_cols), sep)
22722272
else:
22732273
# no NaNs - can just concatenate
22742274
result = cat_core(all_cols, sep)

0 commit comments

Comments
 (0)