37
37
_shared_docs = dict ()
38
38
39
39
40
- def cat_core (list_of_columns , sep ):
40
+ def cat_core (all_cols , sep ):
41
41
"""
42
42
Auxiliary function for :meth:`str.cat`
43
43
44
44
Parameters
45
45
----------
46
- list_of_columns : list of numpy arrays
47
- List of arrays to be concatenated with sep;
48
- these arrays may not contain NaNs!
46
+ all_cols : two-dimensional numpy array
47
+ array of columns to be concatenated with sep;
48
+ this array may not contain NaNs!
49
49
sep : string
50
50
The separator string for concatenating the columns
51
51
@@ -54,9 +54,12 @@ def cat_core(list_of_columns, sep):
54
54
nd.array
55
55
The concatenation of list_of_columns with sep
56
56
"""
57
+ list_of_columns = np .split (all_cols , all_cols .shape [1 ], axis = 1 )
57
58
list_with_sep = [sep ] * (2 * len (list_of_columns ) - 1 )
58
59
list_with_sep [::2 ] = list_of_columns
59
- return np .sum (list_with_sep , axis = 0 )
60
+ # np.split splits into arrays of shape (N, 1); NOT (N,)
61
+ # need to reduce dimensionality of result
62
+ return np .sum (list_with_sep , axis = 0 )[:, 0 ]
60
63
61
64
62
65
def _na_map (f , arr , na_result = np .nan , dtype = object ):
@@ -2239,21 +2242,21 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
2239
2242
"'outer'|'inner'|'right'`. The future default will "
2240
2243
"be `join='left'`." , FutureWarning , stacklevel = 2 )
2241
2244
2242
- # if join is None, _get_series_list already aligned indexes
2243
- join = 'left' if join is None else join
2245
+ # concatenate others into DataFrame; need to add keys for uniqueness in
2246
+ # case of duplicate columns (for join is None, all indexes are already
2247
+ # the same after _get_series_list, which forces alignment in this case)
2248
+ others = concat (others , axis = 1 ,
2249
+ join = (join if join == 'inner' else 'outer' ),
2250
+ keys = range (len (others )), copy = False )
2244
2251
2245
2252
# align if required
2246
- if any (not data .index .equals (x .index ) for x in others ):
2247
- # Need to add keys for uniqueness in case of duplicate columns
2248
- others = concat (others , axis = 1 ,
2249
- join = (join if join == 'inner' else 'outer' ),
2250
- keys = range (len (others )), copy = False )
2253
+ if not data .index .equals (others .index ):
2251
2254
data , others = data .align (others , join = join )
2252
- others = [others [x ] for x in others ] # again list of Series
2253
2255
2254
- all_cols = [ensure_object (x ) for x in [data ] + others ]
2255
- na_masks = np .array ([isna (x ) for x in all_cols ])
2256
- union_mask = np .logical_or .reduce (na_masks , axis = 0 )
2256
+ # collect all columns
2257
+ all_cols = ensure_object (concat ([data , others ], axis = 1 , copy = False ))
2258
+ na_masks = isna (all_cols )
2259
+ union_mask = np .logical_or .reduce (na_masks , axis = 1 )
2257
2260
2258
2261
if na_rep is None and union_mask .any ():
2259
2262
# no na_rep means NaNs for all rows where any column has a NaN
@@ -2262,13 +2265,10 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
2262
2265
np .putmask (result , union_mask , np .nan )
2263
2266
2264
2267
not_masked = ~ union_mask
2265
- result [not_masked ] = cat_core ([x [not_masked ] for x in all_cols ],
2266
- sep )
2268
+ result [not_masked ] = cat_core (all_cols [not_masked ], sep )
2267
2269
elif na_rep is not None and union_mask .any ():
2268
2270
# fill NaNs with na_rep in case there are actually any NaNs
2269
- all_cols = [np .where (nm , na_rep , col )
2270
- for nm , col in zip (na_masks , all_cols )]
2271
- result = cat_core (all_cols , sep )
2271
+ result = cat_core (np .where (na_masks , na_rep , all_cols ), sep )
2272
2272
else :
2273
2273
# no NaNs - can just concatenate
2274
2274
result = cat_core (all_cols , sep )
0 commit comments