Skip to content

Commit 96ac6fa

Browse files
author
Roger Thomas
committed
Update
1 parent d9f9786 commit 96ac6fa

File tree

2 files changed

+43
-24
lines changed

2 files changed

+43
-24
lines changed

pandas/core/frame.py

Lines changed: 33 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1932,16 +1932,15 @@ def to_dict(self, orient: str = "dict", into=dict):
19321932
for t in self.itertuples(index=False, name=None)
19331933
]
19341934
elif object_dtype_cols:
1935-
is_object_dtype_by_index = [
1936-
col in object_dtype_cols for col in self.columns
1937-
]
1938-
data = [
1939-
[
1940-
maybe_box_native(v) if is_object_dtype_by_index[i] else v
1941-
for i, v in enumerate(t)
1942-
]
1943-
for t in self.itertuples(index=False, name=None)
1935+
# A number of ways were tried here, this solution proved to be the
1936+
# most optimal in general
1937+
data = [list(t) for t in self.itertuples(index=False, name=None)]
1938+
object_type_indices = [
1939+
i for i, col in enumerate(self.columns) if col in object_dtype_cols
19441940
]
1941+
for row in data:
1942+
for i in object_type_indices:
1943+
row[i] = maybe_box_native(row[i])
19451944
else:
19461945
data = [list(t) for t in self.itertuples(index=False, name=None)]
19471946
return into_c(
@@ -1955,7 +1954,16 @@ def to_dict(self, orient: str = "dict", into=dict):
19551954
return into_c((k, v) for k, v in self.items())
19561955
elif orient == "records":
19571956
columns = self.columns.tolist()
1958-
if object_dtype_cols:
1957+
if are_all_object_dtype_cols:
1958+
rows = (
1959+
dict(zip(columns, row))
1960+
for row in self.itertuples(index=False, name=None)
1961+
)
1962+
return [
1963+
into_c((k, maybe_box_native(v)) for k, v in row.items())
1964+
for row in rows
1965+
]
1966+
elif object_dtype_cols:
19591967
is_object_dtype_by_index = [col in object_dtype_cols for col in columns]
19601968
return [
19611969
into_c(
@@ -1980,7 +1988,12 @@ def to_dict(self, orient: str = "dict", into=dict):
19801988
if not self.index.is_unique:
19811989
raise ValueError("DataFrame index must be unique for orient='index'.")
19821990
columns = self.columns.tolist()
1983-
if object_dtype_cols:
1991+
if are_all_object_dtype_cols:
1992+
return into_c(
1993+
(t[0], dict(zip(self.columns, map(maybe_box_native, t[1:]))))
1994+
for t in self.itertuples(name=None)
1995+
)
1996+
elif object_dtype_cols:
19841997
is_object_dtype_by_index = [
19851998
col in object_dtype_cols for col in self.columns
19861999
]
@@ -1998,10 +2011,7 @@ def to_dict(self, orient: str = "dict", into=dict):
19982011
)
19992012
else:
20002013
return into_c(
2001-
(
2002-
t[0],
2003-
{columns[i]: v for i, v in enumerate(t[1:])},
2004-
)
2014+
(t[0], dict(zip(self.columns, t[1:])))
20052015
for t in self.itertuples(name=None)
20062016
)
20072017
elif orient == "tight":
@@ -2011,16 +2021,15 @@ def to_dict(self, orient: str = "dict", into=dict):
20112021
for t in self.itertuples(index=False, name=None)
20122022
]
20132023
elif object_dtype_cols:
2014-
is_object_dtype_by_index = [
2015-
col in object_dtype_cols for col in self.columns
2016-
]
2017-
data = [
2018-
[
2019-
maybe_box_native(v) if is_object_dtype_by_index[i] else v
2020-
for i, v in enumerate(t)
2021-
]
2022-
for t in self.itertuples(index=False, name=None)
2024+
# A number of ways were tried here, this solution proved to be the
2025+
# most optimal in general
2026+
data = [list(t) for t in self.itertuples(index=False, name=None)]
2027+
object_type_indices = [
2028+
i for i, col in enumerate(self.columns) if col in object_dtype_cols
20232029
]
2030+
for row in data:
2031+
for i in object_type_indices:
2032+
row[i] = maybe_box_native(row[i])
20242033
else:
20252034
data = [list(t) for t in self.itertuples(index=False, name=None)]
20262035
return into_c(

pandas/tests/frame/methods/test_to_dict.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,16 @@ def test_to_dict_orient_tight(self, index, columns):
380380
"b": [float, float, float],
381381
},
382382
),
383+
( # Make sure we have one df which is all object type cols
384+
{
385+
"a": [1, "hello", 3],
386+
"b": [1.1, "world", 3.3],
387+
},
388+
{
389+
"a": [int, str, int],
390+
"b": [float, str, float],
391+
},
392+
),
383393
),
384394
)
385395
def test_to_dict_returns_native_types(self, orient, data, expected_types):

0 commit comments

Comments
 (0)