Skip to content

Commit 5fdf1e3

Browse files
committed
BUG: MultiIndex mangling during parsing (#18062)
1 parent 27bbea7 commit 5fdf1e3

File tree

3 files changed

+22
-4
lines changed

3 files changed

+22
-4
lines changed

doc/source/whatsnew/v0.22.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ Bug Fixes
8989

9090
- Bug in ``pd.read_msgpack()`` with a non existent file is passed in Python 2 (:issue:`15296`)
9191
- Bug in ``DataFrame.groupby`` where key as tuple in a ``MultiIndex`` were interpreted as a list of keys (:issue:`17979`)
92+
- Bug in ``pd.read_csv`` where a multi-index with duplicate columns was not being mangled appropriately (:issue: `18062`)
9293

9394
Conversion
9495
^^^^^^^^^^

pandas/io/parsers.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1106,6 +1106,11 @@ def _is_index_col(col):
11061106
return col is not None and col is not False
11071107

11081108

1109+
def _is_potential_multi_index(columns):
1110+
return (len(columns) and not isinstance(columns, MultiIndex) and
1111+
all([isinstance(c, tuple) for c in columns]))
1112+
1113+
11091114
def _evaluate_usecols(usecols, names):
11101115
"""
11111116
Check whether or not the 'usecols' parameter
@@ -1374,14 +1379,18 @@ def _maybe_dedup_names(self, names):
13741379
if self.mangle_dupe_cols:
13751380
names = list(names) # so we can index
13761381
counts = defaultdict(int)
1382+
is_potential_mi = _is_potential_multi_index(names)
13771383

13781384
for i, col in enumerate(names):
13791385
cur_count = counts[col]
13801386

13811387
while cur_count > 0:
13821388
counts[col] = cur_count + 1
13831389

1384-
col = '%s.%d' % (col, cur_count)
1390+
if is_potential_mi:
1391+
col = col[:-1] + ('%s.%d' % (col[-1], cur_count),)
1392+
else:
1393+
col = '%s.%d' % (col, cur_count)
13851394
cur_count = counts[col]
13861395

13871396
names[i] = col
@@ -1391,9 +1400,7 @@ def _maybe_dedup_names(self, names):
13911400

13921401
def _maybe_make_multi_index_columns(self, columns, col_names=None):
13931402
# possibly create a column mi here
1394-
if (not self.tupleize_cols and len(columns) and
1395-
not isinstance(columns, MultiIndex) and
1396-
all([isinstance(c, tuple) for c in columns])):
1403+
if _is_potential_multi_index(columns):
13971404
columns = MultiIndex.from_tuples(columns, names=col_names)
13981405
return columns
13991406

pandas/tests/io/parser/header.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,3 +290,13 @@ def test_singleton_header(self):
290290
df = self.read_csv(StringIO(data), header=[0])
291291
expected = DataFrame({"a": [0, 1], "b": [1, 2], "c": [2, 3]})
292292
tm.assert_frame_equal(df, expected)
293+
294+
def test_mangles_multi_index(self):
295+
# See GH 18062
296+
data = """A,A,A,B\none,one,one,two\n0,40, 34,0.1"""
297+
df = self.read_csv(StringIO(data), header=[0, 1])
298+
expected = DataFrame([[0, 40, 34, 0.1]],
299+
columns=MultiIndex.from_tuples(
300+
[('A', 'one'), ('A', 'one.1'),
301+
('A', 'one.2'), ('B', 'two')]))
302+
tm.assert_frame_equal(df, expected)

0 commit comments

Comments
 (0)