2
2
import datetime
3
3
import warnings
4
4
import operator
5
+
5
6
from functools import partial
6
7
from pandas .compat import range , zip , lrange , lzip , u , reduce , filter , map
7
8
from pandas import compat
16
17
from pandas .core .base import PandasObject , FrozenList , FrozenNDArray , IndexOpsMixin , _shared_docs
17
18
from pandas .util .decorators import (Appender , Substitution , cache_readonly ,
18
19
deprecate )
19
- from pandas .core .common import isnull , array_equivalent
20
20
import pandas .core .common as com
21
- from pandas .core .common import (_values_from_object , is_float , is_integer ,
21
+ from pandas .core .common import (isnull , array_equivalent ,
22
+ _values_from_object , is_float , is_integer , is_categorical_dtype ,
22
23
ABCSeries , _ensure_object , _ensure_int64 , is_bool_indexer ,
23
24
is_list_like , is_bool_dtype , is_null_slice , is_integer_dtype )
24
25
from pandas .core .config import get_option
@@ -163,6 +164,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
163
164
return Float64Index (data , copy = copy , dtype = dtype , name = name )
164
165
elif issubclass (data .dtype .type , np .bool ) or is_bool_dtype (data ):
165
166
subarr = data .astype ('object' )
167
+ elif is_categorical_dtype (data ):
168
+ return CategoricalIndex (data , copy = copy , name = name , ** kwargs )
166
169
else :
167
170
subarr = com ._asarray_tuplesafe (data , dtype = object )
168
171
@@ -171,6 +174,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
171
174
if copy :
172
175
subarr = subarr .copy ()
173
176
177
+ elif is_categorical_dtype (data ):
178
+ return CategoricalIndex (data , copy = copy , name = name , ** kwargs )
174
179
elif hasattr (data , '__array__' ):
175
180
return Index (np .asarray (data ), dtype = dtype , copy = copy , name = name ,
176
181
** kwargs )
@@ -626,6 +631,9 @@ def is_numeric(self):
626
631
def is_object (self ):
627
632
return self .dtype == np .object_
628
633
634
+ def is_categorical (self ):
635
+ return self .inferred_type in ['categorical' ]
636
+
629
637
def is_mixed (self ):
630
638
return 'mixed' in self .inferred_type
631
639
@@ -1045,10 +1053,12 @@ def _format_with_header(self, header, na_rep='NaN', **kwargs):
1045
1053
1046
1054
from pandas .core .format import format_array
1047
1055
1048
- if values .dtype == np .object_ :
1056
+ if com .is_categorical_dtype (values .dtype ):
1057
+ values = np .array (values )
1058
+ elif com .is_object_dtype (values .dtype ):
1049
1059
values = lib .maybe_convert_objects (values , safe = 1 )
1050
1060
1051
- if values .dtype == np . object_ :
1061
+ if com . is_object_dtype ( values .dtype ) :
1052
1062
result = [com .pprint_thing (x , escape_chars = ('\t ' , '\r ' , '\n ' ))
1053
1063
for x in values ]
1054
1064
@@ -1543,7 +1553,7 @@ def get_indexer(self, target, method=None, limit=None):
1543
1553
if pself is not self or ptarget is not target :
1544
1554
return pself .get_indexer (ptarget , method = method , limit = limit )
1545
1555
1546
- if self .dtype != target .dtype :
1556
+ if not com . is_dtypes_equal ( self .dtype , target .dtype ) :
1547
1557
this = self .astype (object )
1548
1558
target = target .astype (object )
1549
1559
return this .get_indexer (target , method = method , limit = limit )
@@ -2511,6 +2521,206 @@ def invalid_op(self, other=None):
2511
2521
Index ._add_numeric_methods_disabled ()
2512
2522
Index ._add_logical_methods ()
2513
2523
2524
+ class CategoricalIndex (Index ):
2525
+ """
2526
+
2527
+ Immutable Index implementing an ordered, sliceable set. CategoricalIndex
2528
+ represents a sparsely populated Index with an underlying Categorical.
2529
+
2530
+ Parameters
2531
+ ----------
2532
+ data : array-like (1-dimensional)
2533
+ categories : optional categories for the CategoricalIndex
2534
+ copy : bool
2535
+ Make a copy of input ndarray
2536
+ name : object
2537
+ Name to be stored in the index
2538
+
2539
+ """
2540
+
2541
+ _typ = 'categoricalindex'
2542
+ _engine_type = _index .Int64Engine
2543
+ _attributes = ['name' ,'categories' ]
2544
+
2545
+ def __new__ (cls , data = None , categories = None , dtype = None , copy = False , name = None , fastpath = False , ordered = None , ** kwargs ):
2546
+
2547
+ def create_categorical (data = data , categories = categories ):
2548
+ if categories is not None :
2549
+ data = data .set_categories (categories )
2550
+ if not data .ordered :
2551
+ data = data .as_ordered ()
2552
+ return data
2553
+
2554
+ if fastpath :
2555
+ return cls ._simple_new (data , name = name )
2556
+
2557
+ if ordered is not None :
2558
+ raise ValueError ("CategoricalIndex are by definition ordered" )
2559
+
2560
+ if isinstance (data , com .ABCCategorical ):
2561
+ data = create_categorical (data , categories )
2562
+ elif data is None or np .isscalar (data ):
2563
+ cls ._scalar_data_error (data )
2564
+ elif isinstance (data , CategoricalIndex ):
2565
+ data = data ._data
2566
+ data = create_categorical (data , categories )
2567
+ else :
2568
+ from pandas .core .categorical import Categorical
2569
+ data = Categorical (data , categories = categories , ordered = True )
2570
+
2571
+ if copy :
2572
+ data = data .copy ()
2573
+
2574
+ return cls ._simple_new (data , name = name )
2575
+
2576
+ @classmethod
2577
+ def _simple_new (cls , values , name = None , categories = None , ** kwargs ):
2578
+ result = object .__new__ (cls )
2579
+
2580
+ if not isinstance (values , com .ABCCategorical ):
2581
+ from pandas .core .categorical import Categorical
2582
+ values = Categorical (values , categories = categories , ordered = True )
2583
+ elif categories is not None :
2584
+ values = values .set_categories (categories )
2585
+
2586
+ result ._data = values
2587
+ result .name = name
2588
+ for k , v in compat .iteritems (kwargs ):
2589
+ setattr (result ,k ,v )
2590
+
2591
+ result ._reset_identity ()
2592
+ return result
2593
+
2594
+ def equals (self , other ):
2595
+ """
2596
+ Determines if two CategorialIndex objects contain the same elements.
2597
+ """
2598
+ if self .is_ (other ):
2599
+ return True
2600
+
2601
+ if not isinstance (other , CategoricalIndex ):
2602
+ return False
2603
+
2604
+ try :
2605
+ return (self ._data == other ._data ).all ()
2606
+ except :
2607
+ return False
2608
+
2609
+ @property
2610
+ def inferred_type (self ):
2611
+ return 'categorical'
2612
+
2613
+ @property
2614
+ def values (self ):
2615
+ """ return the underlying data, which is a Categorical """
2616
+ return self ._data
2617
+
2618
+ @property
2619
+ def codes (self ):
2620
+ return self ._data .codes
2621
+
2622
+ @property
2623
+ def categories (self ):
2624
+ return self ._data .categories
2625
+
2626
+ def __contains__ (self , key ):
2627
+ hash (key )
2628
+ return key in self .categories
2629
+
2630
+ def __array__ (self , result = None ):
2631
+ """ the array interface, return my values """
2632
+ return np .array (self ._data )
2633
+
2634
+ def _array_values (self ):
2635
+ return self .values
2636
+
2637
+ def argsort (self , * args , ** kwargs ):
2638
+ return self .values .argsort (* args , ** kwargs )
2639
+
2640
+ @cache_readonly
2641
+ def _engine (self ):
2642
+
2643
+ # we are going to look things up with the codes themselves
2644
+ return self ._engine_type (lambda : self .codes .astype ('i8' ), len (self ))
2645
+
2646
+ @cache_readonly
2647
+ def is_unique (self ):
2648
+ return not self .duplicated ().any ()
2649
+
2650
+ @Appender (_shared_docs ['duplicated' ] % _index_doc_kwargs )
2651
+ def duplicated (self , take_last = False ):
2652
+ from pandas .hashtable import duplicated_int64
2653
+ return duplicated_int64 (self .codes .astype ('i8' ), take_last )
2654
+
2655
+ def get_loc (self , key , method = None ):
2656
+ """
2657
+ Get integer location for requested label
2658
+
2659
+ Parameters
2660
+ ----------
2661
+ key : label
2662
+ method : {None}
2663
+ * default: exact matches only.
2664
+
2665
+ Returns
2666
+ -------
2667
+ loc : int if unique index, possibly slice or mask if not
2668
+ """
2669
+ tcodes = self .categories .get_indexer ([key ])
2670
+ if (tcodes == - 1 ):
2671
+ raise KeyError
2672
+ return self ._engine .get_indexer_non_unique (tcodes )[0 ]
2673
+
2674
+ def get_indexer (self , target , method = None , limit = None ):
2675
+ """
2676
+ Compute indexer and mask for new index given the current index. The
2677
+ indexer should be then used as an input to ndarray.take to align the
2678
+ current data to the new index. The mask determines whether labels are
2679
+ found or not in the current index
2680
+
2681
+ Parameters
2682
+ ----------
2683
+ target : MultiIndex or Index (of tuples)
2684
+ method : {'pad', 'ffill', 'backfill', 'bfill'}
2685
+ pad / ffill: propagate LAST valid observation forward to next valid
2686
+ backfill / bfill: use NEXT valid observation to fill gap
2687
+
2688
+ Notes
2689
+ -----
2690
+ This is a low-level method and probably should be used at your own risk
2691
+
2692
+ Examples
2693
+ --------
2694
+ >>> indexer, mask = index.get_indexer(new_index)
2695
+ >>> new_values = cur_values.take(indexer)
2696
+ >>> new_values[-mask] = np.nan
2697
+
2698
+ Returns
2699
+ -------
2700
+ (indexer, mask) : (ndarray, ndarray)
2701
+ """
2702
+ method = com ._clean_reindex_fill_method (method )
2703
+ target = _ensure_index (target )
2704
+
2705
+ if isinstance (target , CategoricalIndex ):
2706
+ target = target .categories
2707
+
2708
+ if method == 'pad' or method == 'backfill' :
2709
+ raise NotImplementedError ("method='pad' and method='backfill' not implemented yet "
2710
+ 'for CategoricalIndex' )
2711
+ elif method == 'nearest' :
2712
+ raise NotImplementedError ("method='nearest' not implemented yet "
2713
+ 'for CategoricalIndex' )
2714
+ else :
2715
+
2716
+ tcodes = self .categories .get_indexer (target )
2717
+ indexer = self ._engine .get_indexer_non_unique (tcodes )[0 ]
2718
+
2719
+ return com ._ensure_platform_int (indexer )
2720
+
2721
+
2722
+ CategoricalIndex ._add_numeric_methods_disabled ()
2723
+ CategoricalIndex ._add_logical_methods_disabled ()
2514
2724
2515
2725
class NumericIndex (Index ):
2516
2726
"""
0 commit comments