Skip to content

CLN: read_html global importing check #51505

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 3 additions & 38 deletions pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,33 +53,6 @@
if TYPE_CHECKING:
from pandas import DataFrame

_IMPORTS = False
_HAS_BS4 = False
_HAS_LXML = False
_HAS_HTML5LIB = False


def _importers() -> None:
# import things we need
# but make this done on a first use basis

global _IMPORTS
if _IMPORTS:
return

global _HAS_BS4, _HAS_LXML, _HAS_HTML5LIB
bs4 = import_optional_dependency("bs4", errors="ignore")
_HAS_BS4 = bs4 is not None

lxml = import_optional_dependency("lxml.etree", errors="ignore")
_HAS_LXML = lxml is not None

html5lib = import_optional_dependency("html5lib", errors="ignore")
_HAS_HTML5LIB = html5lib is not None

_IMPORTS = True


#############
# READ HTML #
#############
Expand Down Expand Up @@ -922,16 +895,10 @@ def _parser_dispatch(flavor: str | None) -> type[_HtmlFrameParser]:
)

if flavor in ("bs4", "html5lib"):
if not _HAS_HTML5LIB:
raise ImportError("html5lib not found, please install it")
if not _HAS_BS4:
raise ImportError("BeautifulSoup4 (bs4) not found, please install it")
# Although we call this above, we want to raise here right before use.
bs4 = import_optional_dependency("bs4") # noqa:F841

import_optional_dependency("html5lib")
import_optional_dependency("bs4")
else:
if not _HAS_LXML:
raise ImportError("lxml not found, please install it")
import_optional_dependency("lxml.etree")
return _valid_parsers[flavor]


Expand Down Expand Up @@ -1194,8 +1161,6 @@ def read_html(
See the :ref:`read_html documentation in the IO section of the docs
<io.read_html>` for some examples of reading in HTML tables.
"""
_importers()

# Type check here. We don't want to parse only to fail because of an
# invalid value of an integer skiprows.
if isinstance(skiprows, numbers.Integral) and skiprows < 0:
Expand Down
6 changes: 1 addition & 5 deletions pandas/tests/io/test_html.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from functools import partial
from importlib import reload
from io import (
BytesIO,
StringIO,
Expand All @@ -26,6 +25,7 @@
Timestamp,
date_range,
read_csv,
read_html,
to_datetime,
)
import pandas._testing as tm
Expand All @@ -36,7 +36,6 @@

from pandas.io.common import file_path_to_url
import pandas.io.html
from pandas.io.html import read_html


@pytest.fixture(
Expand Down Expand Up @@ -1350,9 +1349,6 @@ def run(self):
else:
self.err = None

# force import check by reinitalising global vars in html.py
reload(pandas.io.html)

filename = datapath("io", "data", "html", "valid_markup.html")
helper_thread1 = ErrorThread(target=self.read_html, args=(filename,))
helper_thread2 = ErrorThread(target=self.read_html, args=(filename,))
Expand Down