pandas-dev · rhshadrach · Nov 22, 2022 · Apr 22, 2022 · Apr 22, 2022 · Apr 22, 2022
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -453,6 +453,7 @@ Performance improvements
 - Performance improvement when setting values in a pyarrow backed string array (:issue:`46400`)
 - Performance improvement in :func:`factorize` (:issue:`46109`)
 - Performance improvement in :class:`DataFrame` and :class:`Series` constructors for extension dtype scalars (:issue:`45854`)
+- Performance improvement in :meth:`DataFrame.to_dict` and :meth:`Series.to_dict` especially when using non-mixed dtypes (:issue:`46470`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_150.bug_fixes:

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1909,41 +1909,73 @@ def to_dict(self, orient: str = "dict", into=dict):
             elif orient.startswith("i"):
                 orient = "index"
 
+        object_dtype_cols = {
+            col for col, dtype in self.dtypes.items() if is_object_dtype(dtype)
+        }
+        are_all_object_dtype_cols = len(object_dtype_cols) == len(self.dtypes)
         if orient == "dict":
             return into_c((k, v.to_dict(into)) for k, v in self.items())
 
         elif orient == "list":
             return into_c(
-                (k, list(map(maybe_box_native, v.tolist()))) for k, v in self.items()
+                (
+                    k,
+                    list(map(maybe_box_native, v.tolist()))
+                    if k in object_dtype_cols
+                    else v.tolist(),
+                )
+                for k, v in self.items()
             )
 
         elif orient == "split":
+            if are_all_object_dtype_cols:
+                data = [
+                    list(map(maybe_box_native, t))
+                    for t in self.itertuples(index=False, name=None)
+                ]
+            elif object_dtype_cols:
+                # A number of ways were tried here, this solution proved to be the
+                # most optimal in general
+                data = [list(t) for t in self.itertuples(index=False, name=None)]
+                object_type_indices = [
+                    i for i, col in enumerate(self.columns) if col in object_dtype_cols
+                ]
+                for row in data:
+                    for i in object_type_indices:
+                        row[i] = maybe_box_native(row[i])
+            else:
+                data = [list(t) for t in self.itertuples(index=False, name=None)]
             return into_c(
                 (
                     ("index", self.index.tolist()),
                     ("columns", self.columns.tolist()),
-                    (
-                        "data",
-                        [
-                            list(map(maybe_box_native, t))
-                            for t in self.itertuples(index=False, name=None)
-                        ],
-                    ),
+                    ("data", data),
                 )
             )
 
         elif orient == "tight":
+            if are_all_object_dtype_cols:
+                data = [
+                    list(map(maybe_box_native, t))
+                    for t in self.itertuples(index=False, name=None)
+                ]
+            elif object_dtype_cols:
+                # A number of ways were tried here, this solution proved to be the
+                # most optimal in general
+                data = [list(t) for t in self.itertuples(index=False, name=None)]
+                object_type_indices = [
+                    i for i, col in enumerate(self.columns) if col in object_dtype_cols
+                ]
+                for row in data:
+                    for i in object_type_indices:
+                        row[i] = maybe_box_native(row[i])
+            else:
+                data = [list(t) for t in self.itertuples(index=False, name=None)]
             return into_c(
                 (
                     ("index", self.index.tolist()),
                     ("columns", self.columns.tolist()),
-                    (
-                        "data",
-                        [
-                            list(map(maybe_box_native, t))
-                            for t in self.itertuples(index=False, name=None)
-                        ],
-                    ),
+                    ("data", data),
                     ("index_names", list(self.index.names)),
                     ("column_names", list(self.columns.names)),
                 )
@@ -1954,21 +1986,67 @@ def to_dict(self, orient: str = "dict", into=dict):
 
         elif orient == "records":
             columns = self.columns.tolist()
-            rows = (
-                dict(zip(columns, row))
-                for row in self.itertuples(index=False, name=None)
-            )
-            return [
-                into_c((k, maybe_box_native(v)) for k, v in row.items()) for row in rows
-            ]
+            if are_all_object_dtype_cols:
+                rows = (
+                    dict(zip(columns, row))
+                    for row in self.itertuples(index=False, name=None)
+                )
+                return [
+                    into_c((k, maybe_box_native(v)) for k, v in row.items())
+                    for row in rows
+                ]
+            elif object_dtype_cols:
+                is_object_dtype_by_index = [col in object_dtype_cols for col in columns]
+                return [
+                    into_c(
+                        zip(
+                            columns,
+                            [
+                                maybe_box_native(v)
+                                if is_object_dtype_by_index[i]
+                                else v
+                                for i, v in enumerate(t)
+                            ],
+                        )
+                    )
+                    for t in self.itertuples(index=False, name=None)
+                ]
+            else:
+                return [
+                    into_c(zip(columns, t))
+                    for t in self.itertuples(index=False, name=None)
+                ]
 
         elif orient == "index":
             if not self.index.is_unique:
                 raise ValueError("DataFrame index must be unique for orient='index'.")
-            return into_c(
-                (t[0], dict(zip(self.columns, map(maybe_box_native, t[1:]))))
-                for t in self.itertuples(name=None)
-            )
+            columns = self.columns.tolist()
+            if are_all_object_dtype_cols:
+                return into_c(
+                    (t[0], dict(zip(self.columns, map(maybe_box_native, t[1:]))))
+                    for t in self.itertuples(name=None)
+                )
+            elif object_dtype_cols:
+                is_object_dtype_by_index = [
+                    col in object_dtype_cols for col in self.columns
+                ]
+                return into_c(
+                    (
+                        t[0],
+                        {
+                            columns[i]: maybe_box_native(v)
+                            if is_object_dtype_by_index[i]
+                            else v
+                            for i, v in enumerate(t[1:])
+                        },
+                    )
+                    for t in self.itertuples(name=None)
+                )
+            else:
+                return into_c(
+                    (t[0], dict(zip(self.columns, t[1:])))
+                    for t in self.itertuples(name=None)
+                )
 
         else:
             raise ValueError(f"orient '{orient}' not understood")

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1771,7 +1771,13 @@ def to_dict(self, into=dict):
         """
         # GH16122
         into_c = com.standardize_mapping(into)
-        return into_c((k, maybe_box_native(v)) for k, v in self.items())
+
+        if is_object_dtype(self):
+            return into_c((k, maybe_box_native(v)) for k, v in self.items())
+        else:
+            # Not an object dtype => all types will be the same so let the default
+            # indexer return native python type
+            return into_c((k, v) for k, v in self.items())
 
     def to_frame(self, name: Hashable = lib.no_default) -> DataFrame:
         """

diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py
@@ -380,6 +380,16 @@ def test_to_dict_orient_tight(self, index, columns):
                     "b": [float, float, float],
                 },
             ),
+            (  # Make sure we have one df which is all object type cols
+                {
+                    "a": [1, "hello", 3],
+                    "b": [1.1, "world", 3.3],
+                },
+                {
+                    "a": [int, str, int],
+                    "b": [float, str, float],
+                },
+            ),
         ),
     )
     def test_to_dict_returns_native_types(self, orient, data, expected_types):