Skip to content

PERF: lazify IO imports #52421

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
180 changes: 115 additions & 65 deletions pandas/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# pylint: disable=undefined-all-variable
from __future__ import annotations

__docformat__ = "restructuredtext"
Expand Down Expand Up @@ -113,8 +114,6 @@
from pandas.tseries.api import infer_freq
from pandas.tseries import offsets

from pandas.core.computation.api import eval

from pandas.core.reshape.api import (
concat,
lreshape,
Expand All @@ -133,43 +132,8 @@
)

from pandas import api, arrays, errors, io, plotting, tseries
from pandas import testing
from pandas.util._print_versions import show_versions

from pandas.io.api import (
# excel
ExcelFile,
ExcelWriter,
read_excel,
# parsers
read_csv,
read_fwf,
read_table,
# pickle
read_pickle,
to_pickle,
# pytables
HDFStore,
read_hdf,
# sql
read_sql,
read_sql_query,
read_sql_table,
# misc
read_clipboard,
read_parquet,
read_orc,
read_feather,
read_gbq,
read_html,
read_xml,
read_json,
read_stata,
read_sas,
read_spss,
)

from pandas.io.json._normalize import json_normalize

from pandas.util._tester import test

Expand All @@ -182,6 +146,92 @@
del get_versions, v


def __getattr__(key: str):
# lazy imports to speed up 'import pandas as pd'
if key == "eval":
from pandas.core.computation.api import eval

return eval
elif key == "testing":
import pandas.testing # pylint: disable=redefined-outer-name

return pandas.testing

elif key in {
"ExcelFile",
"ExcelWriter",
"read_excel",
"read_csv",
"read_fwf",
"read_table",
"read_pickle",
"to_pickle",
"HDFStore",
"read_hdf",
"read_sql",
"read_sql_query",
"read_sql_table",
"read_clipboard",
"read_parquet",
"read_orc",
"read_feather",
"read_gbq",
"read_html",
"read_xml",
"read_json",
"read_stata",
"read_sas",
"read_spss",
}:
# Workaround to avoid false-positive in "inconsistent-namespace-usage"
# complaining "Found both 'pandas.io' and 'io' "
import importlib

namespace = importlib.import_module(".io.api", package="pandas")
return getattr(namespace, key)
elif key == "json_normalize":
from pandas.io.json._normalize import json_normalize

return json_normalize
raise AttributeError(f"module 'pandas' has no attribute '{key}'")


def __dir__() -> list[str]:
# include lazy imports defined in __getattr__ in dir()
base = list(globals().keys())
result = (
base
+ [
"ExcelFile",
"ExcelWriter",
"read_excel",
"read_csv",
"read_fwf",
"read_table",
"read_pickle",
"to_pickle",
"HDFStore",
"read_hdf",
"read_sql",
"read_sql_query",
"read_sql_table",
"read_clipboard",
"read_parquet",
"read_orc",
"read_feather",
"read_gbq",
"read_html",
"read_xml",
"read_json",
"read_stata",
"read_sas",
"read_spss",
]
+ ["eval", "json_normalize", "testing"]
)
return result


# module level doc-string
__doc__ = """
pandas - a powerful data analysis and manipulation library for Python
Expand Down Expand Up @@ -226,7 +276,7 @@
# Use __all__ to let type checkers know what is part of the public API.
# Pandas is not (yet) a py.typed library: the public API is determined
# based on the documentation.
__all__ = [
__all__ = [ # pyright: ignore[reportUnsupportedDunderAll]
"ArrowDtype",
"BooleanDtype",
"Categorical",
Expand All @@ -236,13 +286,13 @@
"DateOffset",
"DatetimeIndex",
"DatetimeTZDtype",
"ExcelFile",
"ExcelWriter",
"ExcelFile", # pyright: ignore[reportUnsupportedDunderAll]
"ExcelWriter", # pyright: ignore[reportUnsupportedDunderAll]
"Flags",
"Float32Dtype",
"Float64Dtype",
"Grouper",
"HDFStore",
"HDFStore", # pyright: ignore[reportUnsupportedDunderAll]
"Index",
"IndexSlice",
"Int16Dtype",
Expand Down Expand Up @@ -280,7 +330,7 @@
"date_range",
"describe_option",
"errors",
"eval",
"eval", # pyright: ignore[reportUnsupportedDunderAll]
"factorize",
"get_dummies",
"from_dummies",
Expand All @@ -290,7 +340,7 @@
"io",
"isna",
"isnull",
"json_normalize",
"json_normalize", # pyright: ignore[reportUnsupportedDunderAll]
"lreshape",
"melt",
"merge",
Expand All @@ -306,36 +356,36 @@
"pivot_table",
"plotting",
"qcut",
"read_clipboard",
"read_csv",
"read_excel",
"read_feather",
"read_fwf",
"read_gbq",
"read_hdf",
"read_html",
"read_json",
"read_orc",
"read_parquet",
"read_pickle",
"read_sas",
"read_spss",
"read_sql",
"read_sql_query",
"read_sql_table",
"read_stata",
"read_table",
"read_xml",
"read_clipboard", # pyright: ignore[reportUnsupportedDunderAll]
"read_csv", # pyright: ignore[reportUnsupportedDunderAll]
"read_excel", # pyright: ignore[reportUnsupportedDunderAll]
"read_feather", # pyright: ignore[reportUnsupportedDunderAll]
"read_fwf", # pyright: ignore[reportUnsupportedDunderAll]
"read_gbq", # pyright: ignore[reportUnsupportedDunderAll]
"read_hdf", # pyright: ignore[reportUnsupportedDunderAll]
"read_html", # pyright: ignore[reportUnsupportedDunderAll]
"read_json", # pyright: ignore[reportUnsupportedDunderAll]
"read_orc", # pyright: ignore[reportUnsupportedDunderAll]
"read_parquet", # pyright: ignore[reportUnsupportedDunderAll]
"read_pickle", # pyright: ignore[reportUnsupportedDunderAll]
"read_sas", # pyright: ignore[reportUnsupportedDunderAll]
"read_spss", # pyright: ignore[reportUnsupportedDunderAll]
"read_sql", # pyright: ignore[reportUnsupportedDunderAll]
"read_sql_query", # pyright: ignore[reportUnsupportedDunderAll]
"read_sql_table", # pyright: ignore[reportUnsupportedDunderAll]
"read_stata", # pyright: ignore[reportUnsupportedDunderAll]
"read_table", # pyright: ignore[reportUnsupportedDunderAll]
"read_xml", # pyright: ignore[reportUnsupportedDunderAll]
"reset_option",
"set_eng_float_format",
"set_option",
"show_versions",
"test",
"testing",
"testing", # pyright: ignore[reportUnsupportedDunderAll]
"timedelta_range",
"to_datetime",
"to_numeric",
"to_pickle",
"to_pickle", # pyright: ignore[reportUnsupportedDunderAll]
"to_timedelta",
"tseries",
"unique",
Expand Down
21 changes: 18 additions & 3 deletions pandas/api/interchange/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,23 @@
# pylint: disable=undefined-all-variable
"""
Public API for DataFrame interchange protocol.
"""

from pandas.core.interchange.dataframe_protocol import DataFrame
from pandas.core.interchange.from_dataframe import from_dataframe

__all__ = ["from_dataframe", "DataFrame"]
def __getattr__(key: str):
# lazy imports to speed 'import pandas as pd'
if key == "DataFrame":
from pandas.core.interchange.dataframe_protocol import DataFrame

return DataFrame
elif key == "from_dataframe":
from pandas.core.interchange.from_dataframe import from_dataframe

return from_dataframe
raise AttributeError(key)


__all__ = [
"from_dataframe", # pyright: ignore[reportUnsupportedDunderAll]
"DataFrame", # pyright: ignore[reportUnsupportedDunderAll]
]
29 changes: 25 additions & 4 deletions pandas/api/typing/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# pylint: disable=undefined-all-variable
"""
Public API classes that store intermediate results useful for type-hinting.
"""
from __future__ import annotations

from pandas.core.groupby import (
DataFrameGroupBy,
Expand All @@ -25,8 +27,27 @@

# TODO: Can't import Styler without importing jinja2
# from pandas.io.formats.style import Styler
from pandas.io.json._json import JsonReader
from pandas.io.stata import StataReader


def __getattr__(key: str):
if key == "JsonReader":
from pandas.io.json._json import JsonReader

return JsonReader
elif key == "StataReader":
from pandas.io.stata import StataReader

return StataReader
else:
raise AttributeError(f"module 'pandas.api.typing' has no attribute '{key}'")


def __dir__() -> list[str]:
# include lazy imports defined in __getattr__ in dir()
base = list(globals().keys())
result = base + ["JsonReader", "StataReader"]
return sorted(result)


__all__ = [
"DataFrameGroupBy",
Expand All @@ -35,13 +56,13 @@
"ExpandingGroupby",
"ExponentialMovingWindow",
"ExponentialMovingWindowGroupby",
"JsonReader",
"JsonReader", # pyright: ignore[reportUnsupportedDunderAll]
"PeriodIndexResamplerGroupby",
"Resampler",
"Rolling",
"RollingGroupby",
"SeriesGroupBy",
"StataReader",
"StataReader", # pyright: ignore[reportUnsupportedDunderAll]
# See TODO above
# "Styler",
"TimedeltaIndexResamplerGroupby",
Expand Down
Loading