Skip to content

Commit f6d7c49

Browse files
committed
Merge pull request #10791 from sinhrks/take_freq
BUG: Index.take may add unnecessary freq attribute
2 parents 12e6fe5 + c06dd9b commit f6d7c49

File tree

5 files changed

+33
-14
lines changed

5 files changed

+33
-14
lines changed

doc/source/whatsnew/v0.17.0.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,7 @@ Performance Improvements
551551
- 4x improvement in ``timedelta`` string parsing (:issue:`6755`, :issue:`10426`)
552552
- 8x improvement in ``timedelta64`` and ``datetime64`` ops (:issue:`6755`)
553553
- Significantly improved performance of indexing ``MultiIndex`` with slicers (:issue:`10287`)
554+
- 8x improvement in ``iloc`` using list-like input (:issue:`10791`)
554555
- Improved performance of ``Series.isin`` for datetimelike/integer Series (:issue:`10287`)
555556
- 20x improvement in ``concat`` of Categoricals when categories are identical (:issue:`10587`)
556557
- Improved performance of ``to_datetime`` when specified format string is ISO8601 (:issue:`10178`)
@@ -627,7 +628,7 @@ Bug Fixes
627628
- Bug in ``read_msgpack`` where DataFrame to decode has duplicate column names (:issue:`9618`)
628629
- Bug in ``io.common.get_filepath_or_buffer`` which caused reading of valid S3 files to fail if the bucket also contained keys for which the user does not have read permission (:issue:`10604`)
629630
- Bug in vectorised setting of timestamp columns with python ``datetime.date`` and numpy ``datetime64`` (:issue:`10408`, :issue:`10412`)
630-
631+
- Bug in ``Index.take`` may add unnecessary ``freq`` attribute (:issue:`10791`)
631632

632633
- Bug in ``pd.DataFrame`` when constructing an empty DataFrame with a string dtype (:issue:`9428`)
633634
- Bug in ``pd.unique`` for arrays with the ``datetime64`` or ``timedelta64`` dtype that meant an array with object dtype was returned instead the original dtype (:issue: `9431`)

pandas/core/index.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1194,7 +1194,7 @@ def _ensure_compat_concat(indexes):
11941194

11951195
return indexes
11961196

1197-
def take(self, indexer, axis=0):
1197+
def take(self, indices, axis=0):
11981198
"""
11991199
return a new Index of the values selected by the indexer
12001200
@@ -1203,11 +1203,9 @@ def take(self, indexer, axis=0):
12031203
numpy.ndarray.take
12041204
"""
12051205

1206-
indexer = com._ensure_platform_int(indexer)
1207-
taken = np.array(self).take(indexer)
1208-
1209-
# by definition cannot propogate freq
1210-
return self._shallow_copy(taken, freq=None)
1206+
indices = com._ensure_platform_int(indices)
1207+
taken = self.values.take(indices)
1208+
return self._shallow_copy(taken)
12111209

12121210
def putmask(self, mask, value):
12131211
"""

pandas/tests/test_index.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,11 @@ def test_take(self):
276276
expected = ind[indexer]
277277
self.assertTrue(result.equals(expected))
278278

279+
if not isinstance(ind, (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
280+
# GH 10791
281+
with tm.assertRaises(AttributeError):
282+
ind.freq
283+
279284
def test_setops_errorcases(self):
280285
for name, idx in compat.iteritems(self.indices):
281286
# # non-iterable input
@@ -4775,7 +4780,7 @@ def test_repr_roundtrip(self):
47754780

47764781
mi = MultiIndex.from_product([list('ab'),range(3)],names=['first','second'])
47774782
str(mi)
4778-
4783+
47794784
if compat.PY3:
47804785
tm.assert_index_equal(eval(repr(mi)), mi, exact=True)
47814786
else:
@@ -4784,11 +4789,11 @@ def test_repr_roundtrip(self):
47844789
tm.assert_index_equal(result, mi, exact=False)
47854790
self.assertEqual(mi.get_level_values('first').inferred_type, 'string')
47864791
self.assertEqual(result.get_level_values('first').inferred_type, 'unicode')
4787-
4792+
47884793
mi_u = MultiIndex.from_product([list(u'ab'),range(3)],names=['first','second'])
47894794
result = eval(repr(mi_u))
4790-
tm.assert_index_equal(result, mi_u, exact=True)
4791-
4795+
tm.assert_index_equal(result, mi_u, exact=True)
4796+
47924797
# formatting
47934798
if compat.PY3:
47944799
str(mi)
@@ -4810,7 +4815,7 @@ def test_repr_roundtrip(self):
48104815

48114816
mi = MultiIndex.from_product([list(u'abcdefg'),range(10)],names=['first','second'])
48124817
result = eval(repr(mi_u))
4813-
tm.assert_index_equal(result, mi_u, exact=True)
4818+
tm.assert_index_equal(result, mi_u, exact=True)
48144819

48154820
def test_str(self):
48164821
# tested elsewhere

pandas/tseries/base.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,10 +182,12 @@ def take(self, indices, axis=0):
182182
"""
183183
Analogous to ndarray.take
184184
"""
185-
maybe_slice = lib.maybe_indices_to_slice(com._ensure_int64(indices), len(self))
185+
indices = com._ensure_int64(indices)
186+
maybe_slice = lib.maybe_indices_to_slice(indices, len(self))
186187
if isinstance(maybe_slice, slice):
187188
return self[maybe_slice]
188-
return super(DatetimeIndexOpsMixin, self).take(indices, axis)
189+
taken = self.asi8.take(indices)
190+
return self._shallow_copy(taken, freq=None)
189191

190192
def get_duplicates(self):
191193
values = Index.get_duplicates(self)

vb_suite/indexing.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,3 +265,16 @@
265265

266266
multiindex_slicers = Benchmark('mdt2.loc[idx[test_A-eps_A:test_A+eps_A,test_B-eps_B:test_B+eps_B,test_C-eps_C:test_C+eps_C,test_D-eps_D:test_D+eps_D],:]', setup,
267267
start_date=datetime(2015, 1, 1))
268+
269+
#----------------------------------------------------------------------
270+
# take
271+
272+
setup = common_setup + """
273+
s = Series(np.random.rand(100000))
274+
ts = Series(np.random.rand(100000),
275+
index=date_range('2011-01-01', freq='S', periods=100000))
276+
indexer = [True, False, True, True, False] * 20000
277+
"""
278+
279+
series_take_intindex = Benchmark("s.take(indexer)", setup)
280+
series_take_dtindex = Benchmark("ts.take(indexer)", setup)

0 commit comments

Comments
 (0)