@@ -200,6 +200,62 @@ def contains(cat, key, container):
200
200
return any (loc_ in container for loc_ in loc )
201
201
202
202
203
+ def create_categorical_dtype (values , categories = None , ordered = None ,
204
+ dtype = None ):
205
+ """
206
+ Helper function to Construct/return a :class:`CategoricalDtype`.
207
+
208
+ Construct the CategoricalDtype from typical inputs to :class:`Categorical`.
209
+
210
+ Parameters
211
+ ----------
212
+ values : array-like or Categorical, (1-dimensional), optional
213
+ categories : list-like, optional
214
+ categories for the CategoricalDtype
215
+ ordered : bool, optional
216
+ designating if the categories are ordered
217
+ dtype : CategoricalDtype, optional
218
+ Cannot be used in combination with `categories` or `ordered`.
219
+
220
+ Returns
221
+ -------
222
+ CategoricalDtype
223
+
224
+ Examples
225
+ --------
226
+ >>> create_categorical_dtype()
227
+ CategoricalDtype(categories=None, ordered=None)
228
+ >>> create_categorical_dtype(categories=['a', 'b'], ordered=True)
229
+ CategoricalDtype(categories=['a', 'b'], ordered=True)
230
+ >>> dtype = CategoricalDtype(['a', 'b'], ordered=True)
231
+ >>> c = Categorical([0, 1], dtype=dtype, fastpath=True)
232
+ >>> create_categorical_dtype(c, ['x', 'y'], True, dtype=dtype)
233
+ CategoricalDtype(['a', 'b'], ordered=True)
234
+ """
235
+ if dtype is not None :
236
+ # The dtype argument takes precedence over values.dtype (if any)
237
+ if isinstance (dtype , compat .string_types ):
238
+ if dtype == 'category' :
239
+ dtype = CategoricalDtype (categories , ordered )
240
+ else :
241
+ msg = "Unknown dtype {dtype!r}"
242
+ raise ValueError (msg .format (dtype = dtype ))
243
+ elif categories is not None or ordered is not None :
244
+ raise ValueError ("Cannot specify `categories` or `ordered` "
245
+ "together with `dtype`." )
246
+ elif is_categorical (values ):
247
+ # If no "dtype" was passed, use the one from "values", but honor
248
+ # the "ordered" and "categories" arguments
249
+ dtype = values .dtype ._from_categorical_dtype (values .dtype ,
250
+ categories , ordered )
251
+ else :
252
+ # If dtype=None and values is not categorical, create a new dtype.
253
+ # Note: This could potentially have categories=None and ordered=None.
254
+ dtype = CategoricalDtype (categories , ordered )
255
+
256
+ return dtype
257
+
258
+
203
259
_codes_doc = """\
204
260
The category codes of this categorical.
205
261
@@ -316,50 +372,18 @@ class Categorical(ExtensionArray, PandasObject):
316
372
def __init__ (self , values , categories = None , ordered = None , dtype = None ,
317
373
fastpath = False ):
318
374
319
- # Ways of specifying the dtype (prioritized ordered)
320
- # 1. dtype is a CategoricalDtype
321
- # a.) with known categories, use dtype.categories
322
- # b.) else with Categorical values, use values.dtype
323
- # c.) else, infer from values
324
- # d.) specifying dtype=CategoricalDtype and categories is an error
325
- # 2. dtype is a string 'category'
326
- # a.) use categories, ordered
327
- # b.) use values.dtype
328
- # c.) infer from values
329
- # 3. dtype is None
330
- # a.) use categories, ordered
331
- # b.) use values.dtype
332
- # c.) infer from values
333
- if dtype is not None :
334
- # The dtype argument takes precedence over values.dtype (if any)
335
- if isinstance (dtype , compat .string_types ):
336
- if dtype == 'category' :
337
- dtype = CategoricalDtype (categories , ordered )
338
- else :
339
- msg = "Unknown `dtype` {dtype}"
340
- raise ValueError (msg .format (dtype = dtype ))
341
- elif categories is not None or ordered is not None :
342
- raise ValueError ("Cannot specify both `dtype` and `categories`"
343
- " or `ordered`." )
344
- elif is_categorical (values ):
345
- # If no "dtype" was passed, use the one from "values", but honor
346
- # the "ordered" and "categories" arguments
347
- dtype = values .dtype ._from_categorical_dtype (values .dtype ,
348
- categories , ordered )
375
+ dtype = create_categorical_dtype (values , categories , ordered , dtype )
376
+ # At this point, dtype is always a CategoricalDtype, but
377
+ # we may have dtype.categories be None, and we need to
378
+ # infer categories in a factorization step futher below
349
379
380
+ if is_categorical (values ):
350
381
# GH23814, for perf, if values._values already an instance of
351
382
# Categorical, set values to codes, and run fastpath
352
383
if (isinstance (values , (ABCSeries , ABCIndexClass )) and
353
384
isinstance (values ._values , type (self ))):
354
385
values = values ._values .codes .copy ()
355
386
fastpath = True
356
- else :
357
- # If dtype=None and values is not categorical, create a new dtype
358
- dtype = CategoricalDtype (categories , ordered )
359
-
360
- # At this point, dtype is always a CategoricalDtype and you should not
361
- # use categories and ordered seperately.
362
- # if dtype.categories is None, we are inferring
363
387
364
388
if fastpath :
365
389
self ._codes = coerce_indexer_dtype (values , dtype .categories )
@@ -656,6 +680,8 @@ def from_codes(cls, codes, categories, ordered=False):
656
680
categorical. If not given, the resulting categorical will be
657
681
unordered.
658
682
"""
683
+ dtype = create_categorical_dtype (codes , categories , ordered )
684
+
659
685
codes = np .asarray (codes ) # #21767
660
686
if not is_integer_dtype (codes ):
661
687
msg = "codes need to be array-like integers"
@@ -675,14 +701,12 @@ def from_codes(cls, codes, categories, ordered=False):
675
701
raise ValueError (
676
702
"codes need to be convertible to an arrays of integers" )
677
703
678
- categories = CategoricalDtype .validate_categories (categories )
679
-
680
- if len (codes ) and (codes .max () >= len (categories ) or codes .min () < - 1 ):
704
+ if len (codes ) and (
705
+ codes .max () >= len (dtype .categories ) or codes .min () < - 1 ):
681
706
raise ValueError ("codes need to be between -1 and "
682
707
"len(categories)-1" )
683
708
684
- return cls (codes , categories = categories , ordered = ordered ,
685
- fastpath = True )
709
+ return cls (codes , dtype = dtype , fastpath = True )
686
710
687
711
_codes = None
688
712
0 commit comments