Skip to content

Commit 059ffaa

Browse files
chris-b1jreback
authored andcommitted
BUG: regression of read_excel with squeeze=True
closes #12184 closes #12157
1 parent 83fe3f3 commit 059ffaa

File tree

6 files changed

+31
-7
lines changed

6 files changed

+31
-7
lines changed

doc/source/whatsnew/v0.18.0.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ Bug Fixes
565565

566566
- Bug in ``.plot`` potentially modifying the ``colors`` input when the number of columns didn't match the number of series provided (:issue:`12039`).
567567

568-
568+
- Bug in ``read_excel`` failing to read data with one column when ``squeeze=True`` (:issue:`12157`)
569569
- Bug in ``.groupby`` where a ``KeyError`` was not raised for a wrong column if there was only one row in the dataframe (:issue:`11741`)
570570
- Bug in ``.read_csv`` with dtype specified on empty data producing an error (:issue:`12048`)
571571
- Bug in building *pandas* with debugging symbols (:issue:`12123`)

pandas/io/excel.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def read_excel(io, sheetname=0, header=0, skiprows=None, skip_footer=0,
7676
index_col=None, names=None, parse_cols=None, parse_dates=False,
7777
date_parser=None, na_values=None, thousands=None,
7878
convert_float=True, has_index_names=None, converters=None,
79-
engine=None, **kwds):
79+
engine=None, squeeze=False, **kwds):
8080
"""
8181
Read an Excel table into a pandas DataFrame
8282
@@ -133,6 +133,8 @@ def read_excel(io, sheetname=0, header=0, skiprows=None, skip_footer=0,
133133
* If list of ints then indicates list of column numbers to be parsed
134134
* If string then indicates comma separated list of column names and
135135
column ranges (e.g. "A:E" or "A,C,E:F")
136+
squeeze : boolean, default False
137+
If the parsed data only contains one column then return a Series
136138
na_values : list-like, default None
137139
List of additional strings to recognize as NA/NaN
138140
thousands : str, default None
@@ -171,7 +173,8 @@ def read_excel(io, sheetname=0, header=0, skiprows=None, skip_footer=0,
171173
index_col=index_col, parse_cols=parse_cols, parse_dates=parse_dates,
172174
date_parser=date_parser, na_values=na_values, thousands=thousands,
173175
convert_float=convert_float, has_index_names=has_index_names,
174-
skip_footer=skip_footer, converters=converters, **kwds)
176+
skip_footer=skip_footer, converters=converters,
177+
squeeze=squeeze, **kwds)
175178

176179

177180
class ExcelFile(object):
@@ -227,7 +230,7 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
227230
index_col=None, parse_cols=None, parse_dates=False,
228231
date_parser=None, na_values=None, thousands=None,
229232
convert_float=True, has_index_names=None,
230-
converters=None, **kwds):
233+
converters=None, squeeze=False, **kwds):
231234
"""
232235
Parse specified sheet(s) into a DataFrame
233236
@@ -246,6 +249,7 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
246249
skip_footer=skip_footer,
247250
convert_float=convert_float,
248251
converters=converters,
252+
squeeze=squeeze,
249253
**kwds)
250254

251255
def _should_parse(self, i, parse_cols):
@@ -285,7 +289,7 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
285289
index_col=None, has_index_names=None, parse_cols=None,
286290
parse_dates=False, date_parser=None, na_values=None,
287291
thousands=None, convert_float=True,
288-
verbose=False, **kwds):
292+
verbose=False, squeeze=False, **kwds):
289293

290294
skipfooter = kwds.pop('skipfooter', None)
291295
if skipfooter is not None:
@@ -452,11 +456,13 @@ def _parse_cell(cell_contents, cell_typ):
452456
date_parser=date_parser,
453457
skiprows=skiprows,
454458
skip_footer=skip_footer,
459+
squeeze=squeeze,
455460
**kwds)
456461

457462
output[asheetname] = parser.read()
458-
output[asheetname].columns = output[
459-
asheetname].columns.set_names(header_names)
463+
if not squeeze or isinstance(output[asheetname], DataFrame):
464+
output[asheetname].columns = output[
465+
asheetname].columns.set_names(header_names)
460466

461467
if ret_dict:
462468
return output

pandas/io/tests/data/test_squeeze.xls

25.5 KB
Binary file not shown.
8.78 KB
Binary file not shown.
8.76 KB
Binary file not shown.

pandas/io/tests/test_excel.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,24 @@ def test_read_excel_skiprows_list(self):
741741
'skiprows_list', skiprows=np.array([0, 2]))
742742
tm.assert_frame_equal(actual, expected)
743743

744+
def test_read_excel_squeeze(self):
745+
# GH 12157
746+
f = os.path.join(self.dirpath, 'test_squeeze' + self.ext)
747+
748+
actual = pd.read_excel(f, 'two_columns', index_col=0, squeeze=True)
749+
expected = pd.Series([2, 3, 4], [4, 5, 6], name='b')
750+
expected.index.name = 'a'
751+
tm.assert_series_equal(actual, expected)
752+
753+
actual = pd.read_excel(f, 'two_columns', squeeze=True)
754+
expected = pd.DataFrame({'a': [4, 5, 6],
755+
'b': [2, 3, 4]})
756+
tm.assert_frame_equal(actual, expected)
757+
758+
actual = pd.read_excel(f, 'one_column', squeeze=True)
759+
expected = pd.Series([1, 2, 3], name='a')
760+
tm.assert_series_equal(actual, expected)
761+
744762

745763
class XlsReaderTests(XlrdTests, tm.TestCase):
746764
ext = '.xls'

0 commit comments

Comments
 (0)