27
27
algos as libalgos ,
28
28
hashtable as htable ,
29
29
)
30
+ from pandas ._libs .arrays import NDArrayBacked
30
31
from pandas ._libs .lib import no_default
31
32
from pandas ._typing import (
32
33
ArrayLike ,
@@ -349,12 +350,13 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi
349
350
# For comparisons, so that numpy uses our implementation if the compare
350
351
# ops, which raise
351
352
__array_priority__ = 1000
352
- _dtype = CategoricalDtype (ordered = False )
353
353
# tolist is not actually deprecated, just suppressed in the __dir__
354
354
_hidden_attrs = PandasObject ._hidden_attrs | frozenset (["tolist" ])
355
355
_typ = "categorical"
356
356
_can_hold_na = True
357
357
358
+ _dtype : CategoricalDtype
359
+
358
360
def __init__ (
359
361
self ,
360
362
values ,
@@ -373,8 +375,9 @@ def __init__(
373
375
# infer categories in a factorization step further below
374
376
375
377
if fastpath :
376
- self ._ndarray = coerce_indexer_dtype (values , dtype .categories )
377
- self ._dtype = self ._dtype .update_dtype (dtype )
378
+ codes = coerce_indexer_dtype (values , dtype .categories )
379
+ dtype = CategoricalDtype (ordered = False ).update_dtype (dtype )
380
+ super ().__init__ (codes , dtype )
378
381
return
379
382
380
383
if not is_list_like (values ):
@@ -463,8 +466,11 @@ def __init__(
463
466
full_codes [~ null_mask ] = codes
464
467
codes = full_codes
465
468
466
- self ._dtype = self ._dtype .update_dtype (dtype )
467
- self ._ndarray = coerce_indexer_dtype (codes , dtype .categories )
469
+ dtype = CategoricalDtype (ordered = False ).update_dtype (dtype )
470
+ arr = coerce_indexer_dtype (codes , dtype .categories )
471
+ # error: Argument 1 to "__init__" of "NDArrayBacked" has incompatible
472
+ # type "Union[ExtensionArray, ndarray]"; expected "ndarray"
473
+ super ().__init__ (arr , dtype ) # type: ignore[arg-type]
468
474
469
475
@property
470
476
def dtype (self ) -> CategoricalDtype :
@@ -513,9 +519,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
513
519
raise ValueError ("Cannot convert float NaN to integer" )
514
520
515
521
elif len (self .codes ) == 0 or len (self .categories ) == 0 :
516
- # error: Incompatible types in assignment (expression has type "ndarray",
517
- # variable has type "Categorical")
518
- result = np .array ( # type: ignore[assignment]
522
+ result = np .array (
519
523
self ,
520
524
dtype = dtype ,
521
525
copy = copy ,
@@ -533,11 +537,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
533
537
msg = f"Cannot cast { self .categories .dtype } dtype to { dtype } "
534
538
raise ValueError (msg )
535
539
536
- # error: Incompatible types in assignment (expression has type "ndarray",
537
- # variable has type "Categorical")
538
- result = take_nd ( # type: ignore[assignment]
539
- new_cats , ensure_platform_int (self ._codes )
540
- )
540
+ result = take_nd (new_cats , ensure_platform_int (self ._codes ))
541
541
542
542
return result
543
543
@@ -745,7 +745,7 @@ def categories(self, categories):
745
745
"new categories need to have the same number of "
746
746
"items as the old categories!"
747
747
)
748
- self ._dtype = new_dtype
748
+ super (). __init__ ( self ._ndarray , new_dtype )
749
749
750
750
@property
751
751
def ordered (self ) -> Ordered :
@@ -809,7 +809,7 @@ def _set_categories(self, categories, fastpath=False):
809
809
"items than the old categories!"
810
810
)
811
811
812
- self ._dtype = new_dtype
812
+ super (). __init__ ( self ._ndarray , new_dtype )
813
813
814
814
def _set_dtype (self , dtype : CategoricalDtype ) -> Categorical :
815
815
"""
@@ -842,7 +842,7 @@ def set_ordered(self, value, inplace=False):
842
842
inplace = validate_bool_kwarg (inplace , "inplace" )
843
843
new_dtype = CategoricalDtype (self .categories , ordered = value )
844
844
cat = self if inplace else self .copy ()
845
- cat . _dtype = new_dtype
845
+ NDArrayBacked . __init__ ( cat , cat . _ndarray , new_dtype )
846
846
if not inplace :
847
847
return cat
848
848
@@ -961,12 +961,12 @@ def set_categories(
961
961
):
962
962
# remove all _codes which are larger and set to -1/NaN
963
963
cat ._codes [cat ._codes >= len (new_dtype .categories )] = - 1
964
+ codes = cat ._codes
964
965
else :
965
966
codes = recode_for_categories (
966
967
cat .codes , cat .categories , new_dtype .categories
967
968
)
968
- cat ._ndarray = codes
969
- cat ._dtype = new_dtype
969
+ NDArrayBacked .__init__ (cat , codes , new_dtype )
970
970
971
971
if not inplace :
972
972
return cat
@@ -1182,8 +1182,8 @@ def add_categories(self, new_categories, inplace=no_default):
1182
1182
new_dtype = CategoricalDtype (new_categories , self .ordered )
1183
1183
1184
1184
cat = self if inplace else self .copy ()
1185
- cat . _dtype = new_dtype
1186
- cat . _ndarray = coerce_indexer_dtype (cat . _ndarray , new_dtype . categories )
1185
+ codes = coerce_indexer_dtype ( cat . _ndarray , new_dtype . categories )
1186
+ NDArrayBacked . __init__ (cat , codes , new_dtype )
1187
1187
if not inplace :
1188
1188
return cat
1189
1189
@@ -1303,9 +1303,8 @@ def remove_unused_categories(self, inplace=no_default):
1303
1303
new_dtype = CategoricalDtype ._from_fastpath (
1304
1304
new_categories , ordered = self .ordered
1305
1305
)
1306
- cat ._dtype = new_dtype
1307
- cat ._ndarray = coerce_indexer_dtype (inv , new_dtype .categories )
1308
-
1306
+ new_codes = coerce_indexer_dtype (inv , new_dtype .categories )
1307
+ NDArrayBacked .__init__ (cat , new_codes , new_dtype )
1309
1308
if not inplace :
1310
1309
return cat
1311
1310
@@ -1484,7 +1483,7 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
1484
1483
def __setstate__ (self , state ):
1485
1484
"""Necessary for making this object picklable"""
1486
1485
if not isinstance (state , dict ):
1487
- raise Exception ( "invalid pickle state" )
1486
+ return super (). __setstate__ ( state )
1488
1487
1489
1488
if "_dtype" not in state :
1490
1489
state ["_dtype" ] = CategoricalDtype (state ["_categories" ], state ["_ordered" ])
@@ -1493,8 +1492,7 @@ def __setstate__(self, state):
1493
1492
# backward compat, changed what is property vs attribute
1494
1493
state ["_ndarray" ] = state .pop ("_codes" )
1495
1494
1496
- for k , v in state .items ():
1497
- setattr (self , k , v )
1495
+ super ().__setstate__ (state )
1498
1496
1499
1497
@property
1500
1498
def nbytes (self ) -> int :
@@ -1863,16 +1861,7 @@ def _codes(self) -> np.ndarray:
1863
1861
1864
1862
@_codes .setter
1865
1863
def _codes (self , value : np .ndarray ):
1866
- self ._ndarray = value
1867
-
1868
- def _from_backing_data (self , arr : np .ndarray ) -> Categorical :
1869
- assert isinstance (arr , np .ndarray )
1870
- assert arr .dtype == self ._ndarray .dtype
1871
-
1872
- res = object .__new__ (type (self ))
1873
- res ._ndarray = arr
1874
- res ._dtype = self .dtype
1875
- return res
1864
+ NDArrayBacked .__init__ (self , value , self .dtype )
1876
1865
1877
1866
def _box_func (self , i : int ):
1878
1867
if i == - 1 :
0 commit comments