Skip to content

Commit 62e5651

Browse files
committed
PERF: Perf issue in concatting with empty objects (GH3259)
1 parent 2983b69 commit 62e5651

File tree

5 files changed

+50
-12
lines changed

5 files changed

+50
-12
lines changed

doc/source/release.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ Bug Fixes
166166
- Bug in ``Series.reindex`` when specifying a ``method`` with some nan values was inconsistent (noted on a resample) (:issue:`6418`)
167167
- Bug in :meth:`DataFrame.replace` where nested dicts were erroneously
168168
depending on the order of dictionary keys and values (:issue:`5338`).
169+
- Perf issue in concatting with empty objects (:issue:`3259`)
169170

170171
pandas 0.13.1
171172
-------------

pandas/core/groupby.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2209,10 +2209,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
22092209

22102210
# make Nones an empty object
22112211
if com._count_not_none(*values) != len(values):
2212-
v = None
2213-
for v in values:
2214-
if v is not None:
2215-
break
2212+
v = next(v for v in values if v is not None)
22162213
if v is None:
22172214
return DataFrame()
22182215
elif isinstance(v, NDFrame):

pandas/tools/merge.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -957,7 +957,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
957957
objs = [objs[k] for k in keys]
958958

959959
if keys is None:
960-
objs = [obj for obj in objs if obj is not None]
960+
objs = [obj for obj in objs if obj is not None ]
961961
else:
962962
# #1649
963963
clean_keys = []
@@ -970,16 +970,25 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
970970
objs = clean_objs
971971
keys = clean_keys
972972

973-
if len(objs) == 0:
974-
raise Exception('All objects passed were None')
975-
976973
# consolidate data
974+
self.objs = []
977975
for obj in objs:
978-
if isinstance(obj, NDFrame):
979-
obj.consolidate(inplace=True)
980-
self.objs = objs
976+
if not isinstance(obj, NDFrame):
977+
raise TypeError("cannot concatenate a non-NDFrame object")
978+
979+
# skip completely empty
980+
if not np.sum(obj.shape):
981+
continue
982+
983+
# consolidate
984+
obj.consolidate(inplace=True)
985+
self.objs.append(obj)
986+
987+
if len(self.objs) == 0:
988+
raise Exception('All objects passed were None')
981989

982-
sample = objs[0]
990+
# need the first as a sample non-empty as a sample
991+
sample = next(obj for obj in self.objs if np.prod(obj.shape))
983992

984993
# Need to flip BlockManager axis in the DataFrame special case
985994
if isinstance(sample, DataFrame):

pandas/tools/tests/test_merge.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1653,6 +1653,15 @@ def test_handle_empty_objects(self):
16531653

16541654
tm.assert_frame_equal(concatted, expected)
16551655

1656+
# empty as first element with time series
1657+
# GH3259
1658+
df = DataFrame(dict(A = range(10000)),index=date_range('20130101',periods=10000,freq='s'))
1659+
empty = DataFrame()
1660+
result = concat([df,empty])
1661+
assert_frame_equal(result, df)
1662+
result = concat([empty,df])
1663+
assert_frame_equal(result, df)
1664+
16561665
def test_panel_join(self):
16571666
panel = tm.makePanel()
16581667
tm.add_nans(panel)
@@ -1967,6 +1976,13 @@ def test_concat_series_axis1_same_names_ignore_index(self):
19671976
result = concat([s1, s2], axis=1, ignore_index=True)
19681977
self.assertTrue(np.array_equal(result.columns, [0, 1]))
19691978

1979+
def test_concat_invalid(self):
1980+
1981+
# trying to concat a ndframe with a non-ndframe
1982+
df1 = mkdf(10, 2)
1983+
for obj in [1, dict(), [1, 2], (1, 2) ]:
1984+
self.assertRaises(TypeError, lambda x: concat([ df1, obj ]))
1985+
19701986
def test_concat_invalid_first_argument(self):
19711987
df1 = mkdf(10, 2)
19721988
df2 = mkdf(10, 2)

vb_suite/join_merge.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,21 @@ def sample(values, k):
186186
concat_small_frames = Benchmark('concat([df] * 1000)', setup,
187187
start_date=datetime(2012, 1, 1))
188188

189+
190+
#----------------------------------------------------------------------
191+
# Concat empty
192+
193+
setup = common_setup + """
194+
df = DataFrame(dict(A = range(10000)),index=date_range('20130101',periods=10000,freq='s'))
195+
empty = DataFrame()
196+
"""
197+
198+
concat_empty_frames1 = Benchmark('concat([df,empty)', setup,
199+
start_date=datetime(2012, 1, 1))
200+
concat_empty_frames2 = Benchmark('concat([empty,df)', setup,
201+
start_date=datetime(2012, 1, 1))
202+
203+
189204
#----------------------------------------------------------------------
190205
# Ordered merge
191206

0 commit comments

Comments
 (0)