Skip to content

Commit 8b75fda

Browse files
authored
API: stop special-casing dt64/td64 in .unique (#49176)
* API: avoid special-casing dt64/td64 in .unique * whatsnew * GH ref * fix whatsnew * doctest fix * update docstring
1 parent d2e9723 commit 8b75fda

File tree

6 files changed

+25
-19
lines changed

6 files changed

+25
-19
lines changed

doc/source/whatsnew/v2.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ Other API changes
128128
- Passing a ``np.datetime64`` object with non-nanosecond resolution to :class:`Timestamp` will retain the input resolution if it is "s", "ms", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49008`)
129129
- The ``other`` argument in :meth:`DataFrame.mask` and :meth:`Series.mask` now defaults to ``no_default`` instead of ``np.nan`` consistent with :meth:`DataFrame.where` and :meth:`Series.where`. Entries will be filled with the corresponding NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension dtypes). (:issue:`49111`)
130130
- When creating a :class:`Series` with a object-dtype :class:`Index` of datetime objects, pandas no longer silently converts the index to a :class:`DatetimeIndex` (:issue:`39307`, :issue:`23598`)
131+
- :meth:`Series.unique` with dtype "timedelta64[ns]" or "datetime64[ns]" now returns :class:`TimedeltaArray` or :class:`DatetimeArray` instead of ``numpy.ndarray`` (:issue:`49176`)
131132
-
132133

133134
.. ---------------------------------------------------------------------------

pandas/core/base.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323

2424
import pandas._libs.lib as lib
2525
from pandas._typing import (
26-
ArrayLike,
2726
Axis,
2827
AxisInt,
2928
DtypeObj,
@@ -998,18 +997,11 @@ def value_counts(
998997

999998
def unique(self):
1000999
values = self._values
1001-
10021000
if not isinstance(values, np.ndarray):
1003-
result: ArrayLike = values.unique()
1004-
if (
1005-
isinstance(self.dtype, np.dtype) and self.dtype.kind in ["m", "M"]
1006-
) and isinstance(self, ABCSeries):
1007-
# GH#31182 Series._values returns EA
1008-
# unpack numpy datetime for backward-compat
1009-
result = np.asarray(result)
1001+
# i.e. ExtensionArray
1002+
result = values.unique()
10101003
else:
10111004
result = unique1d(values)
1012-
10131005
return result
10141006

10151007
def nunique(self, dropna: bool = True) -> int:

pandas/core/series.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2169,6 +2169,8 @@ def unique(self) -> ArrayLike:
21692169
* Categorical
21702170
* Period
21712171
* Datetime with Timezone
2172+
* Datetime without Timezone
2173+
* Timedelta
21722174
* Interval
21732175
* Sparse
21742176
* IntegerNA
@@ -2181,7 +2183,9 @@ def unique(self) -> ArrayLike:
21812183
array([2, 1, 3])
21822184
21832185
>>> pd.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique()
2184-
array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
2186+
<DatetimeArray>
2187+
['2016-01-01 00:00:00']
2188+
Length: 1, dtype: datetime64[ns]
21852189
21862190
>>> pd.Series([pd.Timestamp('2016-01-01', tz='US/Eastern')
21872191
... for _ in range(3)]).unique()

pandas/tests/base/test_value_counts.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -245,14 +245,16 @@ def test_value_counts_datetime64(index_or_series):
245245
expected_s = Series([3, 2, 1], index=idx)
246246
tm.assert_series_equal(s.value_counts(), expected_s)
247247

248-
expected = np.array(
249-
["2010-01-01 00:00:00", "2009-01-01 00:00:00", "2008-09-09 00:00:00"],
250-
dtype="datetime64[ns]",
248+
expected = pd.array(
249+
np.array(
250+
["2010-01-01 00:00:00", "2009-01-01 00:00:00", "2008-09-09 00:00:00"],
251+
dtype="datetime64[ns]",
252+
)
251253
)
252254
if isinstance(s, Index):
253255
tm.assert_index_equal(s.unique(), DatetimeIndex(expected))
254256
else:
255-
tm.assert_numpy_array_equal(s.unique(), expected)
257+
tm.assert_extension_array_equal(s.unique(), expected)
256258

257259
assert s.nunique() == 3
258260

@@ -277,7 +279,7 @@ def test_value_counts_datetime64(index_or_series):
277279
exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT])
278280
tm.assert_index_equal(unique, exp_idx)
279281
else:
280-
tm.assert_numpy_array_equal(unique[:3], expected)
282+
tm.assert_extension_array_equal(unique[:3], expected)
281283
assert pd.isna(unique[3])
282284

283285
assert s.nunique() == 3
@@ -295,7 +297,7 @@ def test_value_counts_datetime64(index_or_series):
295297
if isinstance(td, Index):
296298
tm.assert_index_equal(td.unique(), expected)
297299
else:
298-
tm.assert_numpy_array_equal(td.unique(), expected.values)
300+
tm.assert_extension_array_equal(td.unique(), expected._values)
299301

300302
td2 = timedelta(1) + (df.dt - df.dt)
301303
td2 = klass(td2, name="dt")

pandas/tests/reshape/merge/test_merge.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,7 @@ def test_merge_nosort(self):
621621
}
622622
df = DataFrame.from_dict(d)
623623
var3 = df.var3.unique()
624-
var3.sort()
624+
var3 = np.sort(var3)
625625
new = DataFrame.from_dict({"var3": var3, "var8": np.random.random(7)})
626626

627627
result = df.merge(new, on="var3", sort=False)

pandas/tests/test_algos.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,10 @@
4444
)
4545
import pandas._testing as tm
4646
import pandas.core.algorithms as algos
47-
from pandas.core.arrays import DatetimeArray
47+
from pandas.core.arrays import (
48+
DatetimeArray,
49+
TimedeltaArray,
50+
)
4851
import pandas.core.common as com
4952

5053

@@ -573,6 +576,10 @@ def test_dtype_preservation(self, any_numpy_dtype):
573576
if any_numpy_dtype in tm.STRING_DTYPES:
574577
expected = expected.astype(object)
575578

579+
if expected.dtype.kind in ["m", "M"]:
580+
# We get TimedeltaArray/DatetimeArray
581+
assert isinstance(result, (DatetimeArray, TimedeltaArray))
582+
result = np.array(result)
576583
tm.assert_numpy_array_equal(result, expected)
577584

578585
def test_datetime64_dtype_array_returned(self):

0 commit comments

Comments
 (0)