pandas-dev
diff --git a/‎pandas/core/api.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/api.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/categorical.py
Lines changed: 22 additions & 2 deletions b/‎pandas/core/categorical.py
Lines changed: 22 additions & 2 deletions
diff --git a/‎pandas/core/common.py
Lines changed: 18 additions & 0 deletions b/‎pandas/core/common.py
Lines changed: 18 additions & 0 deletions
diff --git a/‎pandas/core/index.py
Lines changed: 215 additions & 5 deletions b/‎pandas/core/index.py
Lines changed: 215 additions & 5 deletions
diff --git a/‎pandas/core/series.py
Lines changed: 2 additions & 1 deletion b/‎pandas/core/series.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎pandas/tests/test_categorical.py
Lines changed: 8 additions & 0 deletions b/‎pandas/tests/test_categorical.py
Lines changed: 8 additions & 0 deletions
@@ -8,7 +8,7 @@
 from pandas.core.categorical import Categorical
 from pandas.core.groupby import Grouper
 from pandas.core.format import set_eng_float_format
-from pandas.core.index import Index, Int64Index, Float64Index, MultiIndex
+from pandas.core.index import Index, CategoricalIndex, Int64Index, Float64Index, MultiIndex
 
 from pandas.core.series import Series, TimeSeries
 from pandas.core.frame import DataFrame
 
@@ -14,7 +14,7 @@
 import pandas.core.common as com
 from pandas.util.decorators import cache_readonly
 
-from pandas.core.common import (CategoricalDtype, ABCSeries, isnull, notnull,
+from pandas.core.common import (CategoricalDtype, ABCSeries, ABCCategoricalIndex, isnull, notnull,
                                 is_categorical_dtype, is_integer_dtype, is_object_dtype,
                                 _possibly_infer_to_datetimelike, get_dtype_kinds,
                                 is_list_like, is_sequence, is_null_slice, is_bool,
@@ -79,7 +79,7 @@ def f(self, other):
 
 def maybe_to_categorical(array):
     """ coerce to a categorical if a series is given """
-    if isinstance(array, ABCSeries):
+    if isinstance(array, (ABCSeries, ABCCategoricalIndex)):
         return array.values
     return array
 
@@ -302,11 +302,31 @@ def copy(self):
         return Categorical(values=self._codes.copy(),categories=self.categories,
                            name=self.name, ordered=self.ordered, fastpath=True)
 
+    def astype(self, dtype):
+        """ coerce this type to another dtype """
+        if is_categorical_dtype(dtype):
+            return self
+        elif is_object_dtype(dtype):
+            return np.array(self)
+
+        raise TypeError('Astype a Categorical to anything other than '
+                        'categorical or object is not supported')
+
     @cache_readonly
     def ndim(self):
         """Number of dimensions of the Categorical """
         return self._codes.ndim
 
+    @cache_readonly
+    def size(self):
+        """ return the len of myself """
+        return len(self)
+
+    @cache_readonly
+    def itemsize(self):
+        """ return the size of a single category """
+        return self.categories.itemsize
+
     def reshape(self, new_shape, **kwargs):
         """ compat with .reshape """
         return self
 
@@ -72,6 +72,7 @@ def _check(cls, inst):
 ABCDatetimeIndex = create_pandas_abc_type("ABCDatetimeIndex", "_typ", ("datetimeindex",))
 ABCTimedeltaIndex = create_pandas_abc_type("ABCTimedeltaIndex", "_typ", ("timedeltaindex",))
 ABCPeriodIndex = create_pandas_abc_type("ABCPeriodIndex", "_typ", ("periodindex",))
+ABCCategoricalIndex = create_pandas_abc_type("ABCCategoricalIndex", "_typ", ("categoricalindex",))
 ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series",))
 ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe",))
 ABCPanel = create_pandas_abc_type("ABCPanel", "_typ", ("panel",))
@@ -2438,9 +2439,26 @@ def _get_dtype_type(arr_or_dtype):
         return np.dtype(arr_or_dtype).type
     elif isinstance(arr_or_dtype, CategoricalDtype):
         return CategoricalDtypeType
+    elif isinstance(arr_or_dtype, compat.string_types):
+        if is_categorical_dtype(arr_or_dtype):
+            return CategoricalDtypeType
+        return _get_dtype_type(np.dtype(arr_or_dtype))
     return arr_or_dtype.dtype.type
 
 
+def is_dtypes_equal(source, target):
+    """ return a boolean if the dtypes are equal """
+    source = _get_dtype_type(source)
+    target = _get_dtype_type(target)
+
+    try:
+        return source == target
+    except:
+
+        # invalid comparison
+        # object == category will hit this
+        return False
+
 def is_any_int_dtype(arr_or_dtype):
     tipo = _get_dtype_type(arr_or_dtype)
     return issubclass(tipo, np.integer)
 
@@ -2,6 +2,7 @@
 import datetime
 import warnings
 import operator
+
 from functools import partial
 from pandas.compat import range, zip, lrange, lzip, u, reduce, filter, map
 from pandas import compat
@@ -16,9 +17,9 @@
 from pandas.core.base import PandasObject, FrozenList, FrozenNDArray, IndexOpsMixin, _shared_docs
 from pandas.util.decorators import (Appender, Substitution, cache_readonly,
                                     deprecate)
-from pandas.core.common import isnull, array_equivalent
 import pandas.core.common as com
-from pandas.core.common import (_values_from_object, is_float, is_integer,
+from pandas.core.common import (isnull, array_equivalent,
+                                _values_from_object, is_float, is_integer, is_categorical_dtype,
                                 ABCSeries, _ensure_object, _ensure_int64, is_bool_indexer,
                                 is_list_like, is_bool_dtype, is_null_slice, is_integer_dtype)
 from pandas.core.config import get_option
@@ -163,6 +164,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
                 return Float64Index(data, copy=copy, dtype=dtype, name=name)
             elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data):
                 subarr = data.astype('object')
+            elif is_categorical_dtype(data):
+                return CategoricalIndex(data, copy=copy, name=name, **kwargs)
             else:
                 subarr = com._asarray_tuplesafe(data, dtype=object)
 
@@ -171,6 +174,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
             if copy:
                 subarr = subarr.copy()
 
+        elif is_categorical_dtype(data):
+            return CategoricalIndex(data, copy=copy, name=name, **kwargs)
         elif hasattr(data, '__array__'):
             return Index(np.asarray(data), dtype=dtype, copy=copy, name=name,
                          **kwargs)
@@ -626,6 +631,9 @@ def is_numeric(self):
     def is_object(self):
         return self.dtype == np.object_
 
+    def is_categorical(self):
+        return self.inferred_type in ['categorical']
+
     def is_mixed(self):
         return 'mixed' in self.inferred_type
 
@@ -1045,10 +1053,12 @@ def _format_with_header(self, header, na_rep='NaN', **kwargs):
 
         from pandas.core.format import format_array
 
-        if values.dtype == np.object_:
+        if com.is_categorical_dtype(values.dtype):
+            values = np.array(values)
+        elif com.is_object_dtype(values.dtype):
             values = lib.maybe_convert_objects(values, safe=1)
 
-        if values.dtype == np.object_:
+        if com.is_object_dtype(values.dtype):
             result = [com.pprint_thing(x, escape_chars=('\t', '\r', '\n'))
                       for x in values]
 
@@ -1543,7 +1553,7 @@ def get_indexer(self, target, method=None, limit=None):
         if pself is not self or ptarget is not target:
             return pself.get_indexer(ptarget, method=method, limit=limit)
 
-        if self.dtype != target.dtype:
+        if not com.is_dtypes_equal(self.dtype,target.dtype):
             this = self.astype(object)
             target = target.astype(object)
             return this.get_indexer(target, method=method, limit=limit)
@@ -2511,6 +2521,206 @@ def invalid_op(self, other=None):
 Index._add_numeric_methods_disabled()
 Index._add_logical_methods()
 
+class CategoricalIndex(Index):
+    """
+
+    Immutable Index implementing an ordered, sliceable set. CategoricalIndex
+    represents a sparsely populated Index with an underlying Categorical.
+
+    Parameters
+    ----------
+    data : array-like (1-dimensional)
+    categories : optional categories for the CategoricalIndex
+    copy : bool
+        Make a copy of input ndarray
+    name : object
+        Name to be stored in the index
+
+    """
+
+    _typ = 'categoricalindex'
+    _engine_type = _index.Int64Engine
+    _attributes = ['name','categories']
+
+    def __new__(cls, data=None, categories=None, dtype=None, copy=False, name=None, fastpath=False, ordered=None, **kwargs):
+
+        def create_categorical(data=data, categories=categories):
+            if categories is not None:
+                data = data.set_categories(categories)
+            if not data.ordered:
+                data = data.as_ordered()
+            return data
+
+        if fastpath:
+            return cls._simple_new(data, name=name)
+
+        if ordered is not None:
+            raise ValueError("CategoricalIndex are by definition ordered")
+
+        if isinstance(data, com.ABCCategorical):
+            data = create_categorical(data, categories)
+        elif data is None or np.isscalar(data):
+            cls._scalar_data_error(data)
+        elif isinstance(data, CategoricalIndex):
+            data = data._data
+            data = create_categorical(data, categories)
+        else:
+            from pandas.core.categorical import Categorical
+            data = Categorical(data, categories=categories, ordered=True)
+
+        if copy:
+            data = data.copy()
+
+        return cls._simple_new(data, name=name)
+
+    @classmethod
+    def _simple_new(cls, values, name=None, categories=None, **kwargs):
+        result = object.__new__(cls)
+
+        if not isinstance(values, com.ABCCategorical):
+            from pandas.core.categorical import Categorical
+            values = Categorical(values, categories=categories, ordered=True)
+        elif categories is not None:
+            values = values.set_categories(categories)
+
+        result._data = values
+        result.name = name
+        for k, v in compat.iteritems(kwargs):
+            setattr(result,k,v)
+
+        result._reset_identity()
+        return result
+
+    def equals(self, other):
+        """
+        Determines if two CategorialIndex objects contain the same elements.
+        """
+        if self.is_(other):
+            return True
+
+        if not isinstance(other, CategoricalIndex):
+            return False
+
+        try:
+            return (self._data == other._data).all()
+        except:
+            return False
+
+    @property
+    def inferred_type(self):
+        return 'categorical'
+
+    @property
+    def values(self):
+        """ return the underlying data, which is a Categorical """
+        return self._data
+
+    @property
+    def codes(self):
+        return self._data.codes
+
+    @property
+    def categories(self):
+        return self._data.categories
+
+    def __contains__(self, key):
+        hash(key)
+        return key in self.categories
+
+    def __array__(self, result=None):
+        """ the array interface, return my values """
+        return np.array(self._data)
+
+    def _array_values(self):
+        return self.values
+
+    def argsort(self, *args, **kwargs):
+        return self.values.argsort(*args, **kwargs)
+
+    @cache_readonly
+    def _engine(self):
+
+        # we are going to look things up with the codes themselves
+        return self._engine_type(lambda: self.codes.astype('i8'), len(self))
+
+    @cache_readonly
+    def is_unique(self):
+        return not self.duplicated().any()
+
+    @Appender(_shared_docs['duplicated'] % _index_doc_kwargs)
+    def duplicated(self, take_last=False):
+        from pandas.hashtable import duplicated_int64
+        return duplicated_int64(self.codes.astype('i8'), take_last)
+
+    def get_loc(self, key, method=None):
+        """
+        Get integer location for requested label
+
+        Parameters
+        ----------
+        key : label
+        method : {None}
+            * default: exact matches only.
+
+        Returns
+        -------
+        loc : int if unique index, possibly slice or mask if not
+        """
+        tcodes = self.categories.get_indexer([key])
+        if (tcodes == -1):
+            raise KeyError
+        return self._engine.get_indexer_non_unique(tcodes)[0]
+
+    def get_indexer(self, target, method=None, limit=None):
+        """
+        Compute indexer and mask for new index given the current index. The
+        indexer should be then used as an input to ndarray.take to align the
+        current data to the new index. The mask determines whether labels are
+        found or not in the current index
+
+        Parameters
+        ----------
+        target : MultiIndex or Index (of tuples)
+        method : {'pad', 'ffill', 'backfill', 'bfill'}
+            pad / ffill: propagate LAST valid observation forward to next valid
+            backfill / bfill: use NEXT valid observation to fill gap
+
+        Notes
+        -----
+        This is a low-level method and probably should be used at your own risk
+
+        Examples
+        --------
+        >>> indexer, mask = index.get_indexer(new_index)
+        >>> new_values = cur_values.take(indexer)
+        >>> new_values[-mask] = np.nan
+
+        Returns
+        -------
+        (indexer, mask) : (ndarray, ndarray)
+        """
+        method = com._clean_reindex_fill_method(method)
+        target = _ensure_index(target)
+
+        if isinstance(target, CategoricalIndex):
+            target = target.categories
+
+        if method == 'pad' or method == 'backfill':
+            raise NotImplementedError("method='pad' and method='backfill' not implemented yet "
+                                      'for CategoricalIndex')
+        elif method == 'nearest':
+            raise NotImplementedError("method='nearest' not implemented yet "
+                                      'for CategoricalIndex')
+        else:
+
+            tcodes = self.categories.get_indexer(target)
+            indexer = self._engine.get_indexer_non_unique(tcodes)[0]
+
+        return com._ensure_platform_int(indexer)
+
+
+CategoricalIndex._add_numeric_methods_disabled()
+CategoricalIndex._add_logical_methods_disabled()
 
 class NumericIndex(Index):
     """
 
@@ -2605,8 +2605,9 @@ def _try_cast(arr, take_fast_path):
 
     # GH #846
     if isinstance(data, (np.ndarray, Index, Series)):
-        subarr = np.array(data, copy=False)
+
         if dtype is not None:
+            subarr = np.array(data, copy=False)
 
             # possibility of nan -> garbage
             if com.is_float_dtype(data.dtype) and com.is_integer_dtype(dtype):
 
@@ -2658,6 +2658,14 @@ def cmp(a,b):
             self.assertRaises(TypeError, lambda : invalid(s))
 
 
+    def test_astype_categorical(self):
+
+        cat = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
+        tm.assert_categorical_equal(cat,cat.astype('category'))
+        tm.assert_almost_equal(np.array(cat),cat.astype('object'))
+
+        self.assertRaises(TypeError, lambda : cat.astype(float))
+
     def test_to_records(self):
 
         # GH8626