|
11 | 11 | from pandas.core.dtypes.common import needs_i8_conversion
|
12 | 12 |
|
13 | 13 | import pandas as pd
|
14 |
| -from pandas import CategoricalIndex, MultiIndex, compat |
| 14 | +from pandas import CategoricalIndex, MultiIndex, RangeIndex, compat |
15 | 15 | import pandas.util.testing as tm
|
16 | 16 |
|
17 | 17 |
|
@@ -301,3 +301,41 @@ def test_pickle(self, indices):
|
301 | 301 | unpickled = tm.round_trip_pickle(indices)
|
302 | 302 | assert indices.equals(unpickled)
|
303 | 303 | indices.name = original_name
|
| 304 | + |
| 305 | + @pytest.mark.parametrize('keep', ['first', 'last', False]) |
| 306 | + def test_duplicated(self, indices, keep): |
| 307 | + if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)): |
| 308 | + # MultiIndex tested separately in: |
| 309 | + # tests/indexes/multi/test_unique_and_duplicates |
| 310 | + pytest.skip('Skip check for empty Index, MultiIndex, RangeIndex') |
| 311 | + |
| 312 | + holder = type(indices) |
| 313 | + |
| 314 | + idx = holder(indices) |
| 315 | + if idx.has_duplicates: |
| 316 | + # We are testing the duplicated-method here, so we need to know |
| 317 | + # exactly which indices are duplicate and how (for the result). |
| 318 | + # This is not possible if "idx" has duplicates already, which we |
| 319 | + # therefore remove. This is seemingly circular, as drop_duplicates |
| 320 | + # invokes duplicated, but in the end, it all works out because we |
| 321 | + # cross-check with Series.duplicated, which is tested separately. |
| 322 | + idx = idx.drop_duplicates() |
| 323 | + |
| 324 | + n, k = len(idx), 10 |
| 325 | + duplicated_selection = np.random.choice(n, k * n) |
| 326 | + expected = pd.Series(duplicated_selection).duplicated(keep=keep).values |
| 327 | + idx = holder(idx.values[duplicated_selection]) |
| 328 | + |
| 329 | + result = idx.duplicated(keep=keep) |
| 330 | + tm.assert_numpy_array_equal(result, expected) |
| 331 | + |
| 332 | + def test_has_duplicates(self, indices): |
| 333 | + holder = type(indices) |
| 334 | + if not len(indices) or isinstance(indices, MultiIndex): |
| 335 | + # MultiIndex tested separately in: |
| 336 | + # tests/indexes/multi/test_unique_and_duplicates |
| 337 | + pytest.skip('Skip check for empty Index and MultiIndex') |
| 338 | + |
| 339 | + idx = holder([indices[0]] * 5) |
| 340 | + assert idx.is_unique is False |
| 341 | + assert idx.has_duplicates is True |
0 commit comments