Skip to content

Commit 0ec6958

Browse files
committed
Merge branch 'index-values' into pandas-array-upstream+fu1
2 parents cb740ed + d74c5c9 commit 0ec6958

File tree

7 files changed

+85
-98
lines changed

7 files changed

+85
-98
lines changed

pandas/core/arrays/base.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,3 +312,15 @@ def value_counts(self, dropna=True):
312312
self = self[~self.isna()]
313313

314314
return value_counts(np.array(self))
315+
316+
@property
317+
def _ndarray_values(self):
318+
# type: () -> np.ndarray
319+
"""Internal pandas method for lossy conversion to a NumPy ndarray.
320+
321+
This method is not part of the pandas interface.
322+
323+
The expectation is that this is cheap to compute, and is primarily
324+
used for interacting with our indexers.
325+
"""
326+
return np.array(self)

pandas/core/arrays/categorical.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,10 @@ def dtype(self):
410410
"""The :class:`~pandas.api.types.CategoricalDtype` for this instance"""
411411
return self._dtype
412412

413+
@property
414+
def _ndarray_values(self):
415+
return self.codes
416+
413417
@property
414418
def _constructor(self):
415419
return Categorical

pandas/core/base.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
is_scalar,
1515
is_datetimelike,
1616
is_categorical_dtype,
17-
is_extension_type)
17+
is_extension_type,
18+
is_extension_array_dtype)
1819

1920
from pandas.util._validators import validate_bool_kwarg
2021

@@ -772,18 +773,14 @@ def base(self):
772773
def _ndarray_values(self):
773774
"""The data as an ndarray, possibly losing information.
774775
775-
The expectation is that this is cheap to compute.
776+
The expectation is that this is cheap to compute, and is primarily
777+
used for interacting with our indexers.
776778
777779
- categorical -> codes
778-
779-
See '_values' for more.
780780
"""
781781
# type: () -> np.ndarray
782-
from pandas.core.dtypes.common import is_categorical_dtype
783-
784-
if is_categorical_dtype(self):
785-
return self._values.codes
786-
782+
if is_extension_array_dtype(self):
783+
return self.values._ndarray_values
787784
return self.values
788785

789786
@property
@@ -833,10 +830,8 @@ def tolist(self):
833830

834831
if is_datetimelike(self):
835832
return [com._maybe_box_datetimelike(x) for x in self._values]
836-
elif is_categorical_dtype(self):
837-
return self.values.tolist()
838833
else:
839-
return self._ndarray_values.tolist()
834+
return self._values.tolist()
840835

841836
def __iter__(self):
842837
"""

pandas/core/indexes/category.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -293,10 +293,6 @@ def values(self):
293293
""" return the underlying data, which is a Categorical """
294294
return self._data
295295

296-
@property
297-
def _ndarray_values(self):
298-
return self._data.codes
299-
300296
@property
301297
def itemsize(self):
302298
return self.values.itemsize

pandas/tests/indexes/datetimes/test_datetime.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,3 +469,18 @@ def test_factorize_dst(self):
469469
arr, res = obj.factorize()
470470
tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
471471
tm.assert_index_equal(res, idx)
472+
473+
@pytest.mark.parametrize('arr, expected', [
474+
(pd.DatetimeIndex(['2017', '2017']), pd.DatetimeIndex(['2017'])),
475+
(pd.DatetimeIndex(['2017', '2017'], tz='US/Eastern'),
476+
pd.DatetimeIndex(['2017'], tz='US/Eastern')),
477+
])
478+
def test_unique(self, arr, expected):
479+
result = arr.unique()
480+
481+
if isinstance(expected, np.ndarray):
482+
tm.assert_numpy_array_equal(result, expected)
483+
if isinstance(expected, pd.Series):
484+
tm.assert_series_equal(result, expected)
485+
if isinstance(expected, pd.DatetimeIndex):
486+
tm.assert_index_equal(result, expected)

pandas/tests/indexes/test_multi.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,53 @@ def test_values_boxed(self):
962962
# Check that code branches for boxed values produce identical results
963963
tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
964964

965+
def test_values_multiindex_datetimeindex(self):
966+
# Test to ensure we hit the boxing / nobox part of MI.values
967+
ints = np.arange(10**18, 10**18 + 5)
968+
naive = pd.DatetimeIndex(ints)
969+
aware = pd.DatetimeIndex(ints, tz='US/Central')
970+
971+
idx = pd.MultiIndex.from_arrays([naive, aware])
972+
result = idx.values
973+
974+
outer = pd.DatetimeIndex([x[0] for x in result])
975+
tm.assert_index_equal(outer, naive)
976+
977+
inner = pd.DatetimeIndex([x[1] for x in result])
978+
tm.assert_index_equal(inner, aware)
979+
980+
# n_lev > n_lab
981+
result = idx[:2].values
982+
983+
outer = pd.DatetimeIndex([x[0] for x in result])
984+
tm.assert_index_equal(outer, naive[:2])
985+
986+
inner = pd.DatetimeIndex([x[1] for x in result])
987+
tm.assert_index_equal(inner, aware[:2])
988+
989+
def test_values_multiindex_periodindex(self):
990+
# Test to ensure we hit the boxing / nobox part of MI.values
991+
ints = np.arange(2007, 2012)
992+
pidx = pd.PeriodIndex(ints, freq='D')
993+
994+
idx = pd.MultiIndex.from_arrays([ints, pidx])
995+
result = idx.values
996+
997+
outer = pd.Int64Index([x[0] for x in result])
998+
tm.assert_index_equal(outer, pd.Int64Index(ints))
999+
1000+
inner = pd.PeriodIndex([x[1] for x in result])
1001+
tm.assert_index_equal(inner, pidx)
1002+
1003+
# n_lev > n_lab
1004+
result = idx[:2].values
1005+
1006+
outer = pd.Int64Index([x[0] for x in result])
1007+
tm.assert_index_equal(outer, pd.Int64Index(ints[:2]))
1008+
1009+
inner = pd.PeriodIndex([x[1] for x in result])
1010+
tm.assert_index_equal(inner, pidx[:2])
1011+
9651012
def test_append(self):
9661013
result = self.index[:3].append(self.index[3:])
9671014
assert result.equals(self.index)

pandas/tests/test_base.py

Lines changed: 0 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1178,39 +1178,6 @@ def test_iter_box(self):
11781178
assert res == exp
11791179

11801180

1181-
@pytest.mark.parametrize('arr, expected', [
1182-
(pd.DatetimeIndex(['2017', '2017']), pd.DatetimeIndex(['2017'])),
1183-
(pd.DatetimeIndex(['2017', '2017'], tz='US/Eastern'),
1184-
pd.DatetimeIndex(['2017'], tz='US/Eastern')),
1185-
])
1186-
def test_unique_datetime_index(arr, expected):
1187-
result = arr.unique()
1188-
1189-
if isinstance(expected, np.ndarray):
1190-
tm.assert_numpy_array_equal(result, expected)
1191-
if isinstance(expected, pd.Series):
1192-
tm.assert_series_equal(result, expected)
1193-
if isinstance(expected, pd.DatetimeIndex):
1194-
tm.assert_index_equal(result, expected)
1195-
1196-
1197-
@pytest.mark.parametrize('arr, expected', [
1198-
(pd.Series(pd.DatetimeIndex(['2017', '2017'])),
1199-
np.array(['2017-01-01T00:00:00'], dtype='M8[ns]')),
1200-
(pd.Series(pd.DatetimeIndex(['2017', '2017'], tz='US/Eastern')),
1201-
np.array([pd.Timestamp('2017', tz="US/Eastern")], dtype=object)),
1202-
])
1203-
def test_unique_datetime_series(arr, expected):
1204-
result = arr.unique()
1205-
1206-
if isinstance(expected, np.ndarray):
1207-
tm.assert_numpy_array_equal(result, expected)
1208-
if isinstance(expected, pd.Series):
1209-
tm.assert_series_equal(result, expected)
1210-
if isinstance(expected, pd.DatetimeIndex):
1211-
tm.assert_index_equal(result, expected)
1212-
1213-
12141181
@pytest.mark.parametrize('array, expected_type, dtype', [
12151182
(np.array([0, 1], dtype=np.int64), np.ndarray, 'int64'),
12161183
(np.array(['a', 'b']), np.ndarray, 'object'),
@@ -1260,52 +1227,3 @@ def test_ndarray_values(array, expected):
12601227
r_values = pd.Index(array)._ndarray_values
12611228
tm.assert_numpy_array_equal(l_values, r_values)
12621229
tm.assert_numpy_array_equal(l_values, expected)
1263-
1264-
1265-
def test_values_multiindex_datetimeindex():
1266-
# Test to ensure we hit the boxing / nobox part of MI.values
1267-
ints = np.arange(10**18, 10**18 + 5)
1268-
naive = pd.DatetimeIndex(ints)
1269-
aware = pd.DatetimeIndex(ints, tz='US/Central')
1270-
1271-
idx = pd.MultiIndex.from_arrays([naive, aware])
1272-
result = idx.values
1273-
1274-
outer = pd.DatetimeIndex([x[0] for x in result])
1275-
tm.assert_index_equal(outer, naive)
1276-
1277-
inner = pd.DatetimeIndex([x[1] for x in result])
1278-
tm.assert_index_equal(inner, aware)
1279-
1280-
# n_lev > n_lab
1281-
result = idx[:2].values
1282-
1283-
outer = pd.DatetimeIndex([x[0] for x in result])
1284-
tm.assert_index_equal(outer, naive[:2])
1285-
1286-
inner = pd.DatetimeIndex([x[1] for x in result])
1287-
tm.assert_index_equal(inner, aware[:2])
1288-
1289-
1290-
def test_values_multiindex_periodindex():
1291-
# Test to ensure we hit the boxing / nobox part of MI.values
1292-
ints = np.arange(2007, 2012)
1293-
pidx = pd.PeriodIndex(ints, freq='D')
1294-
1295-
idx = pd.MultiIndex.from_arrays([ints, pidx])
1296-
result = idx.values
1297-
1298-
outer = pd.Int64Index([x[0] for x in result])
1299-
tm.assert_index_equal(outer, pd.Int64Index(ints))
1300-
1301-
inner = pd.PeriodIndex([x[1] for x in result])
1302-
tm.assert_index_equal(inner, pidx)
1303-
1304-
# n_lev > n_lab
1305-
result = idx[:2].values
1306-
1307-
outer = pd.Int64Index([x[0] for x in result])
1308-
tm.assert_index_equal(outer, pd.Int64Index(ints[:2]))
1309-
1310-
inner = pd.PeriodIndex([x[1] for x in result])
1311-
tm.assert_index_equal(inner, pidx[:2])

0 commit comments

Comments
 (0)