Skip to content

Commit 11d9ac1

Browse files
committed
move new constructor to dtypes/dtypes.py
1 parent 0aa56f8 commit 11d9ac1

File tree

5 files changed

+110
-100
lines changed

5 files changed

+110
-100
lines changed

pandas/core/arrays/categorical.py

Lines changed: 4 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -200,71 +200,6 @@ def contains(cat, key, container):
200200
return any(loc_ in container for loc_ in loc)
201201

202202

203-
def create_categorical_dtype(values=None, categories=None, ordered=None,
204-
dtype=None):
205-
"""
206-
Construct and return a :class:`~pandas.api.types.CategoricalDtype`.
207-
208-
This is a helper function, and specifically does not do the
209-
factorization step, if that is needed.
210-
211-
Parameters
212-
----------
213-
values : list-like, optional
214-
The list-like must be 1-dimensional.
215-
categories : list-like, optional
216-
Categories for the CategoricalDtype.
217-
ordered : bool, optional
218-
Designating if the categories are ordered.
219-
dtype : CategoricalDtype or the string "category", optional
220-
If ``CategoricalDtype`` cannot be used together with
221-
`categories` or `ordered`.
222-
223-
Returns
224-
-------
225-
CategoricalDtype
226-
227-
Examples
228-
--------
229-
>>> create_categorical_dtype()
230-
CategoricalDtype(categories=None, ordered=None)
231-
>>> create_categorical_dtype(categories=['a', 'b'], ordered=True)
232-
CategoricalDtype(categories=['a', 'b'], ordered=True)
233-
>>> dtype1 = CategoricalDtype(['a', 'b'], ordered=True)
234-
>>> dtype2 = CategoricalDtype(['x', 'y'], ordered=False)
235-
>>> c = Categorical([0, 1], dtype=dtype1, fastpath=True)
236-
>>> create_categorical_dtype(c, ['x', 'y'], ordered=True, dtype=dtype2)
237-
ValueError: Cannot specify `categories` or `ordered` together with `dtype`.
238-
239-
The supplied dtype takes precedence over values's dtype:
240-
241-
>>> create_categorical_dtype(c, dtype=dtype2)
242-
CategoricalDtype(['x', 'y'], ordered=False)
243-
"""
244-
if dtype is not None:
245-
# The dtype argument takes precedence over values.dtype (if any)
246-
if isinstance(dtype, compat.string_types):
247-
if dtype == 'category':
248-
dtype = CategoricalDtype(categories, ordered)
249-
else:
250-
msg = "Unknown dtype {dtype!r}"
251-
raise ValueError(msg.format(dtype=dtype))
252-
elif categories is not None or ordered is not None:
253-
raise ValueError("Cannot specify `categories` or `ordered` "
254-
"together with `dtype`.")
255-
elif is_categorical(values):
256-
# If no "dtype" was passed, use the one from "values", but honor
257-
# the "ordered" and "categories" arguments
258-
dtype = values.dtype._from_categorical_dtype(values.dtype,
259-
categories, ordered)
260-
else:
261-
# If dtype=None and values is not categorical, create a new dtype.
262-
# Note: This could potentially have categories=None and ordered=None.
263-
dtype = CategoricalDtype(categories, ordered)
264-
265-
return dtype
266-
267-
268203
_codes_doc = """\
269204
The category codes of this categorical.
270205
@@ -381,7 +316,8 @@ class Categorical(ExtensionArray, PandasObject):
381316
def __init__(self, values, categories=None, ordered=None, dtype=None,
382317
fastpath=False):
383318

384-
dtype = create_categorical_dtype(values, categories, ordered, dtype)
319+
dtype = CategoricalDtype._from_values_or_dtype(values, categories,
320+
ordered, dtype)
385321
# At this point, dtype is always a CategoricalDtype, but
386322
# we may have dtype.categories be None, and we need to
387323
# infer categories in a factorization step futher below
@@ -689,7 +625,8 @@ def from_codes(cls, codes, categories, ordered=False):
689625
categorical. If not given, the resulting categorical will be
690626
unordered.
691627
"""
692-
dtype = create_categorical_dtype(codes, categories, ordered)
628+
dtype = CategoricalDtype._from_values_or_dtype(codes, categories,
629+
ordered)
693630

694631
codes = np.asarray(codes) # #21767
695632
if not is_integer_dtype(codes):

pandas/core/dtypes/dtypes.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,77 @@ def _from_categorical_dtype(cls, dtype, categories=None, ordered=None):
241241
ordered = dtype.ordered
242242
return cls(categories, ordered)
243243

244+
@classmethod
245+
def _from_values_or_dtype(cls, values=None, categories=None, ordered=None,
246+
dtype=None):
247+
"""
248+
Construct from the inputs used in :class:`Categorical` construction.
249+
250+
This is an internal helper method, and specifically does not do the
251+
factorization step, if that is needed. Additional steps may
252+
therefore have to be taken to create the final dtype.
253+
254+
Parameters
255+
----------
256+
values : list-like, optional
257+
The list-like must be 1-dimensional.
258+
categories : list-like, optional
259+
Categories for the CategoricalDtype.
260+
ordered : bool, optional
261+
Designating if the categories are ordered.
262+
dtype : CategoricalDtype or the string "category", optional
263+
If ``CategoricalDtype`` cannot be used together with
264+
`categories` or `ordered`.
265+
266+
Returns
267+
-------
268+
CategoricalDtype
269+
270+
Examples
271+
--------
272+
>>> CategoricalDtype._from_values_or_dtype()
273+
CategoricalDtype(categories=None, ordered=None)
274+
>>> CategoricalDtype._from_values_or_dtype(categories=['a', 'b'],
275+
... ordered=True)
276+
CategoricalDtype(categories=['a', 'b'], ordered=True)
277+
>>> dtype1 = CategoricalDtype(['a', 'b'], ordered=True)
278+
>>> dtype2 = CategoricalDtype(['x', 'y'], ordered=False)
279+
>>> c = Categorical([0, 1], dtype=dtype1, fastpath=True)
280+
>>> CategoricalDtype._from_values_or_dtype(c, ['x', 'y'], ordered=True,
281+
... dtype=dtype2)
282+
ValueError: Cannot specify `categories` or `ordered` together with
283+
`dtype`.
284+
285+
The supplied dtype takes precedence over values's dtype:
286+
287+
>>> CategoricalDtype._from_values_or_dtype(c, dtype=dtype2)
288+
CategoricalDtype(['x', 'y'], ordered=False)
289+
"""
290+
from pandas.core.dtypes.common import is_categorical
291+
292+
if dtype is not None:
293+
# The dtype argument takes precedence over values.dtype (if any)
294+
if isinstance(dtype, compat.string_types):
295+
if dtype == 'category':
296+
dtype = CategoricalDtype(categories, ordered)
297+
else:
298+
msg = "Unknown dtype {dtype!r}"
299+
raise ValueError(msg.format(dtype=dtype))
300+
elif categories is not None or ordered is not None:
301+
raise ValueError("Cannot specify `categories` or `ordered` "
302+
"together with `dtype`.")
303+
elif is_categorical(values):
304+
# If no "dtype" was passed, use the one from "values", but honor
305+
# the "ordered" and "categories" arguments
306+
dtype = values.dtype._from_categorical_dtype(values.dtype,
307+
categories, ordered)
308+
else:
309+
# If dtype=None and values is not categorical, create a new dtype.
310+
# Note: This could potentially have categories=None and ordered=None.
311+
dtype = CategoricalDtype(categories, ordered)
312+
313+
return dtype
314+
244315
def _finalize(self, categories, ordered, fastpath=False):
245316

246317
if ordered is not None:

pandas/core/indexes/category.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@
1717

1818
from pandas.core import accessor
1919
from pandas.core.algorithms import take_1d
20-
from pandas.core.arrays.categorical import (
21-
Categorical, contains, create_categorical_dtype)
20+
from pandas.core.arrays.categorical import Categorical, contains
2221
import pandas.core.common as com
2322
from pandas.core.config import get_option
2423
import pandas.core.indexes.base as ibase
@@ -108,7 +107,8 @@ def __new__(cls, data=None, categories=None, ordered=None, dtype=None,
108107
if fastpath:
109108
return cls._simple_new(data, name=name, dtype=dtype)
110109

111-
dtype = create_categorical_dtype(data, categories, ordered, dtype)
110+
dtype = CategoricalDtype._from_values_or_dtype(data, categories,
111+
ordered, dtype)
112112

113113
if name is None and hasattr(data, 'name'):
114114
name = data.name

pandas/tests/arrays/categorical/test_constructors.py

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
Categorical, CategoricalIndex, DatetimeIndex, Index, Interval,
1414
IntervalIndex, NaT, Series, Timestamp, date_range, period_range,
1515
timedelta_range)
16-
from pandas.core.arrays.categorical import create_categorical_dtype
1716
import pandas.util.testing as tm
1817

1918

@@ -531,32 +530,3 @@ def test_constructor_imaginary(self):
531530
c1 = Categorical(values)
532531
tm.assert_index_equal(c1.categories, Index(values))
533532
tm.assert_numpy_array_equal(np.array(c1), np.array(values))
534-
535-
536-
class TestCreateCategoricalDtype(object):
537-
dtype1 = CategoricalDtype(['a', 'b'], ordered=True)
538-
dtype2 = CategoricalDtype(['x', 'y'], ordered=False)
539-
c = Categorical([0, 1], dtype=dtype1, fastpath=True)
540-
541-
@pytest.mark.parametrize('values, categories, ordered, dtype, expected', [
542-
[None, None, None, None, CategoricalDtype()],
543-
[None, ['a', 'b'], True, None, dtype1],
544-
[c, None, None, dtype2, dtype2],
545-
[c, ['x', 'y'], False, None, dtype2],
546-
])
547-
def test_create_categorical_dtype(
548-
self, values, categories, ordered, dtype, expected):
549-
result = create_categorical_dtype(values, categories, ordered, dtype)
550-
assert result == expected
551-
552-
@pytest.mark.parametrize('values, categories, ordered, dtype', [
553-
[None, ['a', 'b'], True, dtype2],
554-
[None, ['a', 'b'], None, dtype2],
555-
[None, None, True, dtype2],
556-
])
557-
def test_create_categorical_dtype_raises(self, values, categories, ordered,
558-
dtype):
559-
msg = "Cannot specify `categories` or `ordered` together with `dtype`."
560-
561-
with pytest.raises(ValueError, match=msg):
562-
create_categorical_dtype(values, categories, ordered, dtype)

pandas/tests/dtypes/test_dtypes.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,38 @@ def test_constructor_invalid(self):
9898
with pytest.raises(TypeError, match=msg):
9999
CategoricalDtype("category")
100100

101+
dtype1 = CategoricalDtype(['a', 'b'], ordered=True)
102+
dtype2 = CategoricalDtype(['x', 'y'], ordered=False)
103+
c = Categorical([0, 1], dtype=dtype1, fastpath=True)
104+
105+
@pytest.mark.parametrize('values, categories, ordered, dtype, expected',
106+
[
107+
[None, None, None, None,
108+
CategoricalDtype()],
109+
[None, ['a', 'b'], True, None, dtype1],
110+
[c, None, None, dtype2, dtype2],
111+
[c, ['x', 'y'], False, None, dtype2],
112+
])
113+
def test_create_categorical_dtype(
114+
self, values, categories, ordered, dtype, expected):
115+
result = CategoricalDtype._from_values_or_dtype(values, categories,
116+
ordered, dtype)
117+
assert result == expected
118+
119+
@pytest.mark.parametrize('values, categories, ordered, dtype', [
120+
[None, ['a', 'b'], True, dtype2],
121+
[None, ['a', 'b'], None, dtype2],
122+
[None, None, True, dtype2],
123+
])
124+
def test_create_categorical_dtype_raises(self, values, categories,
125+
ordered,
126+
dtype):
127+
msg = "Cannot specify `categories` or `ordered` together with `dtype`."
128+
129+
with pytest.raises(ValueError, match=msg):
130+
CategoricalDtype._from_values_or_dtype(values, categories,
131+
ordered, dtype)
132+
101133
def test_is_dtype(self):
102134
assert CategoricalDtype.is_dtype(self.dtype)
103135
assert CategoricalDtype.is_dtype('category')

0 commit comments

Comments
 (0)