Merge pull request #10791 from sinhrks/take_freq

sinhrks · sinhrks · commit f6d7c491f889 · 2015-08-12T22:51:50.000+09:00
BUG: Index.take may add unnecessary freq attribute
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
@@ -551,6 +551,7 @@ Performance Improvements
 - 4x improvement in ``timedelta`` string parsing (:issue:`6755`, :issue:`10426`)
 - 8x improvement in ``timedelta64`` and ``datetime64`` ops (:issue:`6755`)
 - Significantly improved performance of indexing ``MultiIndex`` with slicers (:issue:`10287`)
+- 8x improvement in ``iloc`` using list-like input (:issue:`10791`)
 - Improved performance of ``Series.isin`` for datetimelike/integer Series (:issue:`10287`)
 - 20x improvement in ``concat`` of Categoricals when categories are identical (:issue:`10587`)
 - Improved performance of ``to_datetime`` when specified format string is ISO8601 (:issue:`10178`)
@@ -627,7 +628,7 @@ Bug Fixes
 - Bug in ``read_msgpack`` where DataFrame to decode has duplicate column names (:issue:`9618`)
 - Bug in ``io.common.get_filepath_or_buffer`` which caused reading of valid S3 files to fail if the bucket also contained keys for which the user does not have read permission (:issue:`10604`)
 - Bug in vectorised setting of timestamp columns with python ``datetime.date`` and numpy ``datetime64`` (:issue:`10408`, :issue:`10412`)
-
+- Bug in ``Index.take`` may add unnecessary ``freq`` attribute (:issue:`10791`)
 
 - Bug in ``pd.DataFrame`` when constructing an empty DataFrame with a string dtype (:issue:`9428`)
 - Bug in ``pd.unique`` for arrays with the ``datetime64`` or ``timedelta64`` dtype that meant an array with object dtype was returned instead the original dtype (:issue: `9431`)
diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -1194,7 +1194,7 @@ def _ensure_compat_concat(indexes):
 
         return indexes
 
-    def take(self, indexer, axis=0):
+    def take(self, indices, axis=0):
         """
         return a new Index of the values selected by the indexer
 
@@ -1203,11 +1203,9 @@ def take(self, indexer, axis=0):
         numpy.ndarray.take
         """
 
-        indexer = com._ensure_platform_int(indexer)
-        taken = np.array(self).take(indexer)
-
-        # by definition cannot propogate freq
-        return self._shallow_copy(taken, freq=None)
+        indices = com._ensure_platform_int(indices)
+        taken = self.values.take(indices)
+        return self._shallow_copy(taken)
 
     def putmask(self, mask, value):
         """
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
@@ -276,6 +276,11 @@ def test_take(self):
             expected = ind[indexer]
             self.assertTrue(result.equals(expected))
 
+            if not isinstance(ind, (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
+                # GH 10791
+                with tm.assertRaises(AttributeError):
+                    ind.freq
+
     def test_setops_errorcases(self):
         for name, idx in compat.iteritems(self.indices):
             # # non-iterable input
@@ -4775,7 +4780,7 @@ def test_repr_roundtrip(self):
 
         mi = MultiIndex.from_product([list('ab'),range(3)],names=['first','second'])
         str(mi)
-        
+
         if compat.PY3:
             tm.assert_index_equal(eval(repr(mi)), mi, exact=True)
         else:
@@ -4784,11 +4789,11 @@ def test_repr_roundtrip(self):
             tm.assert_index_equal(result, mi, exact=False)
             self.assertEqual(mi.get_level_values('first').inferred_type, 'string')
             self.assertEqual(result.get_level_values('first').inferred_type, 'unicode')
-            
+
         mi_u = MultiIndex.from_product([list(u'ab'),range(3)],names=['first','second'])
         result = eval(repr(mi_u))
-        tm.assert_index_equal(result, mi_u, exact=True)            
-            
+        tm.assert_index_equal(result, mi_u, exact=True)
+
         # formatting
         if compat.PY3:
             str(mi)
@@ -4810,7 +4815,7 @@ def test_repr_roundtrip(self):
 
         mi = MultiIndex.from_product([list(u'abcdefg'),range(10)],names=['first','second'])
         result = eval(repr(mi_u))
-        tm.assert_index_equal(result, mi_u, exact=True)     
+        tm.assert_index_equal(result, mi_u, exact=True)
 
     def test_str(self):
         # tested elsewhere
diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py
@@ -182,10 +182,12 @@ def take(self, indices, axis=0):
         """
         Analogous to ndarray.take
         """
-        maybe_slice = lib.maybe_indices_to_slice(com._ensure_int64(indices), len(self))
+        indices = com._ensure_int64(indices)
+        maybe_slice = lib.maybe_indices_to_slice(indices, len(self))
         if isinstance(maybe_slice, slice):
             return self[maybe_slice]
-        return super(DatetimeIndexOpsMixin, self).take(indices, axis)
+        taken = self.asi8.take(indices)
+        return self._shallow_copy(taken, freq=None)
 
     def get_duplicates(self):
         values = Index.get_duplicates(self)
diff --git a/vb_suite/indexing.py b/vb_suite/indexing.py
@@ -265,3 +265,16 @@
 
 multiindex_slicers = Benchmark('mdt2.loc[idx[test_A-eps_A:test_A+eps_A,test_B-eps_B:test_B+eps_B,test_C-eps_C:test_C+eps_C,test_D-eps_D:test_D+eps_D],:]', setup,
                                start_date=datetime(2015, 1, 1))
+
+#----------------------------------------------------------------------
+# take
+
+setup = common_setup + """
+s = Series(np.random.rand(100000))
+ts = Series(np.random.rand(100000),
+            index=date_range('2011-01-01', freq='S', periods=100000))
+indexer = [True, False, True, True, False] * 20000
+"""
+
+series_take_intindex = Benchmark("s.take(indexer)", setup)
+series_take_dtindex = Benchmark("ts.take(indexer)", setup)