3
3
4
4
from pandas .compat import zip
5
5
from pandas .core .dtypes .generic import ABCSeries , ABCIndex
6
- from pandas .core .dtypes .missing import isna , notna
6
+ from pandas .core .dtypes .missing import isna
7
7
from pandas .core .dtypes .common import (
8
8
is_bool_dtype ,
9
9
is_categorical_dtype ,
36
36
_shared_docs = dict ()
37
37
38
38
39
- def _get_array_list (arr , others ):
40
- """
41
- Auxiliary function for :func:`str_cat`
42
-
43
- Parameters
44
- ----------
45
- arr : ndarray
46
- The left-most ndarray of the concatenation
47
- others : list, ndarray, Series
48
- The rest of the content to concatenate. If list of list-likes,
49
- all elements must be passable to ``np.asarray``.
50
-
51
- Returns
52
- -------
53
- list
54
- List of all necessary arrays
55
- """
56
- from pandas .core .series import Series
57
-
58
- if len (others ) and isinstance (com .values_from_object (others )[0 ],
59
- (list , np .ndarray , Series )):
60
- arrays = [arr ] + list (others )
61
- else :
62
- arrays = [arr , others ]
63
-
64
- return [np .asarray (x , dtype = object ) for x in arrays ]
65
-
66
-
67
- def str_cat (arr , others = None , sep = None , na_rep = None ):
68
- """
39
+ def interleave_sep (all_cols , sep ):
40
+ '''
69
41
Auxiliary function for :meth:`str.cat`
70
42
71
- If `others` is specified, this function concatenates the Series/Index
72
- and elements of `others` element-wise.
73
- If `others` is not being passed then all values in the Series are
74
- concatenated in a single string with a given `sep`.
75
-
76
43
Parameters
77
44
----------
78
- others : list-like, or list of list-likes, optional
79
- List-likes (or a list of them) of the same length as calling object.
80
- If None, returns str concatenating strings of the Series.
81
- sep : string or None, default None
82
- If None, concatenates without any separator.
83
- na_rep : string or None, default None
84
- If None, NA in the series are ignored.
45
+ all_cols : list of numpy arrays
46
+ List of arrays to be concatenated with sep
47
+ sep : string
48
+ The separator string for concatenating the columns
85
49
86
50
Returns
87
51
-------
88
- concat
89
- ndarray containing concatenated results (if `others is not None`)
90
- or str (if `others is None`)
91
- """
92
- if sep is None :
93
- sep = ''
94
-
95
- if others is not None :
96
- arrays = _get_array_list (arr , others )
97
-
98
- n = _length_check (arrays )
99
- masks = np .array ([isna (x ) for x in arrays ])
100
- cats = None
101
-
102
- if na_rep is None :
103
- na_mask = np .logical_or .reduce (masks , axis = 0 )
104
-
105
- result = np .empty (n , dtype = object )
106
- np .putmask (result , na_mask , np .nan )
107
-
108
- notmask = ~ na_mask
109
-
110
- tuples = zip (* [x [notmask ] for x in arrays ])
111
- cats = [sep .join (tup ) for tup in tuples ]
112
-
113
- result [notmask ] = cats
114
- else :
115
- for i , x in enumerate (arrays ):
116
- x = np .where (masks [i ], na_rep , x )
117
- if cats is None :
118
- cats = x
119
- else :
120
- cats = cats + sep + x
121
-
122
- result = cats
123
-
124
- return result
125
- else :
126
- arr = np .asarray (arr , dtype = object )
127
- mask = isna (arr )
128
- if na_rep is None and mask .any ():
129
- if sep == '' :
130
- na_rep = ''
131
- else :
132
- return sep .join (arr [notna (arr )])
133
- return sep .join (np .where (mask , na_rep , arr ))
134
-
135
-
136
- def _length_check (others ):
137
- n = None
138
- for x in others :
139
- try :
140
- if n is None :
141
- n = len (x )
142
- elif len (x ) != n :
143
- raise ValueError ('All arrays must be same length' )
144
- except TypeError :
145
- raise ValueError ('Must pass arrays containing strings to str_cat' )
146
- return n
52
+ list
53
+ The list of arrays interleaved with sep; to be fed to np.sum
54
+ '''
55
+ if sep == '' :
56
+ # no need to add empty strings
57
+ return all_cols
58
+ result = [sep ] * (2 * len (all_cols ) - 1 )
59
+ result [::2 ] = all_cols
60
+ return result
147
61
148
62
149
63
def _na_map (f , arr , na_result = np .nan , dtype = object ):
@@ -2172,6 +2086,8 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
2172
2086
2173
2087
if isinstance (others , compat .string_types ):
2174
2088
raise ValueError ("Did you mean to supply a `sep` keyword?" )
2089
+ if sep is None :
2090
+ sep = ''
2175
2091
2176
2092
if isinstance (self ._orig , Index ):
2177
2093
data = Series (self ._orig , index = self ._orig )
@@ -2180,9 +2096,13 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
2180
2096
2181
2097
# concatenate Series/Index with itself if no "others"
2182
2098
if others is None :
2183
- result = str_cat (data , others = others , sep = sep , na_rep = na_rep )
2184
- return self ._wrap_result (result ,
2185
- use_codes = (not self ._is_categorical ))
2099
+ data = data .astype (object ).values
2100
+ mask = isna (data )
2101
+ if mask .any ():
2102
+ if na_rep is None :
2103
+ return sep .join (data [~ mask ])
2104
+ return sep .join (np .where (mask , na_rep , data ))
2105
+ return sep .join (data )
2186
2106
2187
2107
try :
2188
2108
# turn anything in "others" into lists of Series
@@ -2209,23 +2129,42 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
2209
2129
"'outer'|'inner'|'right'`. The future default will "
2210
2130
"be `join='left'`." , FutureWarning , stacklevel = 2 )
2211
2131
2212
- # align if required
2213
- if join is not None :
2132
+ # if join is None, _get_series_list already aligned indexes
2133
+ join = 'left' if join is None else join
2134
+
2135
+ if any (not data .index .equals (x .index ) for x in others ):
2214
2136
# Need to add keys for uniqueness in case of duplicate columns
2215
2137
others = concat (others , axis = 1 ,
2216
2138
join = (join if join == 'inner' else 'outer' ),
2217
- keys = range (len (others )))
2139
+ keys = range (len (others )), copy = False )
2218
2140
data , others = data .align (others , join = join )
2219
2141
others = [others [x ] for x in others ] # again list of Series
2220
2142
2221
- # str_cat discards index
2222
- res = str_cat (data , others = others , sep = sep , na_rep = na_rep )
2143
+ all_cols = [x .astype (object ).values for x in [data ] + others ]
2144
+ masks = np .array ([isna (x ) for x in all_cols ])
2145
+ union_mask = np .logical_or .reduce (masks , axis = 0 )
2146
+
2147
+ if na_rep is None and union_mask .any ():
2148
+ result = np .empty (len (data ), dtype = object )
2149
+ np .putmask (result , union_mask , np .nan )
2150
+
2151
+ not_masked = ~ union_mask
2152
+ all_cols = interleave_sep ([x [not_masked ] for x in all_cols ], sep )
2153
+
2154
+ result [not_masked ] = np .sum (all_cols , axis = 0 )
2155
+ elif na_rep is not None and union_mask .any ():
2156
+ # fill NaNs
2157
+ all_cols = [np .where (masks [i ], na_rep , all_cols [i ])
2158
+ for i in range (len (all_cols ))]
2159
+ result = np .sum (interleave_sep (all_cols , sep ), axis = 0 )
2160
+ else : # no NaNs
2161
+ result = np .sum (interleave_sep (all_cols , sep ), axis = 0 )
2223
2162
2224
2163
if isinstance (self ._orig , Index ):
2225
- res = Index (res , name = self ._orig .name )
2164
+ result = Index (result , name = self ._orig .name )
2226
2165
else : # Series
2227
- res = Series (res , index = data .index , name = self ._orig .name )
2228
- return res
2166
+ result = Series (result , index = data .index , name = self ._orig .name )
2167
+ return result
2229
2168
2230
2169
_shared_docs ['str_split' ] = ("""
2231
2170
Split strings around given separator/delimiter.
0 commit comments