Skip to content

Commit 717f4dd

Browse files
Fix flavor param with incorrect type hint in read_html (#772)
* Fix flavor param with incorrect type hint in read_html refs: - pandas-dev/pandas#55059 - pandas-dev/pandas#55076 * Add HTMLFlavors type to read_html ref: pandas-dev/pandas#55529 * Add tests and new dev dependencies Added: - tests to check HTMLFlavors type in read_html flavor arg; - set beautifulsoup4 and html5lib as dev dependencies. They are used by the respective flavors in read_html.
1 parent 2fefa8a commit 717f4dd

File tree

4 files changed

+15
-2
lines changed

4 files changed

+15
-2
lines changed

pandas-stubs/_typing.pyi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,9 @@ ParseDatesArg: TypeAlias = (
587587
# read_xml parsers
588588
XMLParsers: TypeAlias = Literal["lxml", "etree"]
589589

590+
# read_html flavors
591+
HTMLFlavors: TypeAlias = Literal["lxml", "html5lib", "bs4"]
592+
590593
# Any plain Python or numpy function
591594
Function: TypeAlias = np.ufunc | Callable[..., Any]
592595
# Use a distinct HashableT in shared types to avoid conflicts with

pandas-stubs/io/html.pyi

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ from pandas._typing import (
2121
HashableT3,
2222
HashableT4,
2323
HashableT5,
24+
HTMLFlavors,
2425
ReadBuffer,
2526
StorageOptions,
2627
)
@@ -29,7 +30,7 @@ def read_html(
2930
io: FilePath | ReadBuffer[str],
3031
*,
3132
match: str | Pattern = ...,
32-
flavor: str | None = ...,
33+
flavor: HTMLFlavors | Sequence[HTMLFlavors] | None = ...,
3334
header: int | Sequence[int] | None = ...,
3435
index_col: int | Sequence[int] | list[HashableT1] | None = ...,
3536
skiprows: int | Sequence[int] | slice | None = ...,

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ pre-commit = ">=2.19.0"
4848
black = ">=23.3.0"
4949
isort = ">=5.12.0"
5050
openpyxl = ">=3.0.10"
51-
tables = { version = ">=3.8.0" , python = "<4"} # 3.8.0 depends on blosc2 which caps python to <4
51+
tables = { version = ">=3.8.0", python = "<4"} # 3.8.0 depends on blosc2 which caps python to <4
5252
lxml = ">=4.9.1"
5353
pyreadstat = ">=1.2.0"
5454
xlrd = ">=2.0.1"
@@ -61,6 +61,9 @@ jinja2 = ">=3.1"
6161
scipy = { version = ">=1.9.1", python = "<3.13" }
6262
SQLAlchemy = ">=2.0.12"
6363
types-python-dateutil = ">=2.8.19"
64+
numexpr = "<2.8.5" # https://github.com/pandas-dev/pandas/issues/54449
65+
beautifulsoup4 = ">=4.12.2"
66+
html5lib = ">=1.1"
6467

6568
[build-system]
6669
requires = ["poetry-core>=1.0.0"]

tests/test_io.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,6 +1237,12 @@ def test_read_html():
12371237
with ensure_clean() as path:
12381238
check(assert_type(DF.to_html(path), None), type(None))
12391239
check(assert_type(read_html(path), list[DataFrame]), list)
1240+
check(assert_type(read_html(path, flavor=None), list[DataFrame]), list)
1241+
check(assert_type(read_html(path, flavor="bs4"), list[DataFrame]), list)
1242+
check(assert_type(read_html(path, flavor=["bs4"]), list[DataFrame]), list)
1243+
check(
1244+
assert_type(read_html(path, flavor=["bs4", "lxml"]), list[DataFrame]), list
1245+
)
12401246

12411247

12421248
def test_csv_quoting():

0 commit comments

Comments
 (0)