Skip to content

Commit 19ca934

Browse files
jameswinegartm9k1
authored andcommitted
BUG: DataFrame.to_dict when orient=index data loss (pandas-dev#22810)
1 parent f64b5b3 commit 19ca934

File tree

3 files changed

+26
-0
lines changed

3 files changed

+26
-0
lines changed

doc/source/whatsnew/v0.24.0.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,22 @@ is the case with :attr:`Period.end_time`, for example
374374

375375
p.end_time
376376

377+
.. _whatsnew_0240.api_breaking.frame_to_dict_index_orient:
378+
379+
Raise ValueError in ``DataFrame.to_dict(orient='index')``
380+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
381+
382+
Bug in :func:`DataFrame.to_dict` raises ``ValueError`` when used with
383+
``orient='index'`` and a non-unique index instead of losing data (:issue:`22801`)
384+
385+
.. ipython:: python
386+
:okexcept:
387+
388+
df = pd.DataFrame({'a': [1, 2], 'b': [0.5, 0.75]}, index=['A', 'A'])
389+
df
390+
391+
df.to_dict(orient='index')
392+
377393
.. _whatsnew_0240.api.datetimelike.normalize:
378394

379395
Tick DateOffset Normalize Restrictions

pandas/core/frame.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1224,6 +1224,10 @@ def to_dict(self, orient='dict', into=dict):
12241224
for k, v in zip(self.columns, np.atleast_1d(row)))
12251225
for row in self.values]
12261226
elif orient.lower().startswith('i'):
1227+
if not self.index.is_unique:
1228+
raise ValueError(
1229+
"DataFrame index must be unique for orient='index'."
1230+
)
12271231
return into_c((t[0], dict(zip(self.columns, t[1:])))
12281232
for t in self.itertuples())
12291233
else:

pandas/tests/frame/test_convert_to.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,12 @@ def test_to_dict_timestamp(self):
7171
tm.assert_dict_equal(test_data_mixed.to_dict(orient='split'),
7272
expected_split_mixed)
7373

74+
def test_to_dict_index_not_unique_with_index_orient(self):
75+
# GH22801
76+
# Data loss when indexes are not unique. Raise ValueError.
77+
df = DataFrame({'a': [1, 2], 'b': [0.5, 0.75]}, index=['A', 'A'])
78+
pytest.raises(ValueError, df.to_dict, orient='index')
79+
7480
def test_to_dict_invalid_orient(self):
7581
df = DataFrame({'A': [0, 1]})
7682
pytest.raises(ValueError, df.to_dict, orient='xinvalid')

0 commit comments

Comments
 (0)