-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
TYPING: some type hints for pandas\io\common.py #27598
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
9421bb9
0e4a1d9
3abd219
0b96604
749237e
dac39b3
32caa9f
7502435
ab3f546
07980b9
9886585
0d6af90
2701be9
3c71335
171b03a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ | |
import mmap | ||
import os | ||
import pathlib | ||
from typing import IO, BinaryIO, Optional, TextIO, Tuple, Type | ||
from urllib.error import URLError # noqa | ||
from urllib.parse import ( # noqa | ||
urlencode, | ||
|
@@ -32,6 +33,8 @@ | |
|
||
from pandas.core.dtypes.common import is_file_like | ||
|
||
from pandas._typing import FilePathOrBuffer | ||
|
||
# gh-12665: Alias for now and remove later. | ||
CParserError = ParserError | ||
|
||
|
@@ -68,14 +71,14 @@ class BaseIterator: | |
Useful only when the object being iterated is non-reusable (e.g. OK for a | ||
parser, not for an in-memory table, yes for its iterator).""" | ||
|
||
def __iter__(self): | ||
def __iter__(self) -> "BaseIterator": | ||
return self | ||
|
||
def __next__(self): | ||
raise AbstractMethodError(self) | ||
|
||
|
||
def _is_url(url): | ||
def _is_url(url) -> bool: | ||
"""Check to see if a URL has a valid protocol. | ||
|
||
Parameters | ||
|
@@ -93,7 +96,7 @@ def _is_url(url): | |
return False | ||
|
||
|
||
def _expand_user(filepath_or_buffer): | ||
def _expand_user(filepath_or_buffer: FilePathOrBuffer) -> FilePathOrBuffer: | ||
"""Return the argument with an initial component of ~ or ~user | ||
replaced by that user's home directory. | ||
|
||
|
@@ -111,7 +114,7 @@ def _expand_user(filepath_or_buffer): | |
return filepath_or_buffer | ||
|
||
|
||
def _validate_header_arg(header): | ||
def _validate_header_arg(header) -> None: | ||
if isinstance(header, bool): | ||
raise TypeError( | ||
"Passing a bool to header is invalid. " | ||
|
@@ -121,7 +124,7 @@ def _validate_header_arg(header): | |
) | ||
|
||
|
||
def _stringify_path(filepath_or_buffer): | ||
def _stringify_path(filepath_or_buffer: FilePathOrBuffer) -> FilePathOrBuffer: | ||
"""Attempt to convert a path-like object to a string. | ||
|
||
Parameters | ||
|
@@ -144,21 +147,22 @@ def _stringify_path(filepath_or_buffer): | |
strings, buffers, or anything else that's not even path-like. | ||
""" | ||
if hasattr(filepath_or_buffer, "__fspath__"): | ||
return filepath_or_buffer.__fspath__() | ||
# https://github.com/python/mypy/issues/1424 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When we drop 3.5 support do you think we can just do `isinstance(filepath_or_buffer, os.PathLike) here instead? |
||
return filepath_or_buffer.__fspath__() # type: ignore | ||
elif isinstance(filepath_or_buffer, pathlib.Path): | ||
return str(filepath_or_buffer) | ||
return _expand_user(filepath_or_buffer) | ||
|
||
|
||
def is_s3_url(url): | ||
def is_s3_url(url) -> bool: | ||
"""Check for an s3, s3n, or s3a url""" | ||
try: | ||
return parse_url(url).scheme in ["s3", "s3n", "s3a"] | ||
except Exception: | ||
return False | ||
|
||
|
||
def is_gcs_url(url): | ||
def is_gcs_url(url) -> bool: | ||
"""Check for a gcs url""" | ||
try: | ||
return parse_url(url).scheme in ["gcs", "gs"] | ||
|
@@ -167,8 +171,11 @@ def is_gcs_url(url): | |
|
||
|
||
def get_filepath_or_buffer( | ||
filepath_or_buffer, encoding=None, compression=None, mode=None | ||
): | ||
filepath_or_buffer: FilePathOrBuffer, | ||
encoding: Optional[str] = None, | ||
compression: Optional[str] = None, | ||
mode: Optional[str] = None, | ||
) -> Tuple[FilePathOrBuffer, Optional[str], Optional[str], bool]: | ||
""" | ||
If the filepath_or_buffer is a url, translate and return the buffer. | ||
Otherwise passthrough. | ||
|
@@ -190,7 +197,7 @@ def get_filepath_or_buffer( | |
""" | ||
filepath_or_buffer = _stringify_path(filepath_or_buffer) | ||
|
||
if _is_url(filepath_or_buffer): | ||
if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer): | ||
req = urlopen(filepath_or_buffer) | ||
content_encoding = req.headers.get("Content-Encoding", None) | ||
if content_encoding == "gzip": | ||
|
@@ -224,7 +231,7 @@ def get_filepath_or_buffer( | |
return filepath_or_buffer, None, compression, False | ||
|
||
|
||
def file_path_to_url(path): | ||
def file_path_to_url(path: str) -> str: | ||
""" | ||
converts an absolute native path to a FILE URL. | ||
|
||
|
@@ -242,7 +249,9 @@ def file_path_to_url(path): | |
_compression_to_extension = {"gzip": ".gz", "bz2": ".bz2", "zip": ".zip", "xz": ".xz"} | ||
|
||
|
||
def _infer_compression(filepath_or_buffer, compression): | ||
def _infer_compression( | ||
filepath_or_buffer: FilePathOrBuffer, compression: Optional[str] | ||
) -> Optional[str]: | ||
""" | ||
Get the compression method for filepath_or_buffer. If compression='infer', | ||
the inferred compression method is returned. Otherwise, the input | ||
|
@@ -435,7 +444,13 @@ class BytesZipFile(zipfile.ZipFile, BytesIO): # type: ignore | |
""" | ||
|
||
# GH 17778 | ||
def __init__(self, file, mode, compression=zipfile.ZIP_DEFLATED, **kwargs): | ||
def __init__( | ||
self, | ||
file: FilePathOrBuffer, | ||
mode: str, | ||
compression: int = zipfile.ZIP_DEFLATED, | ||
**kwargs | ||
): | ||
if mode in ["wb", "rb"]: | ||
mode = mode.replace("b", "") | ||
super().__init__(file, mode, compression, **kwargs) | ||
|
@@ -461,16 +476,16 @@ class MMapWrapper(BaseIterator): | |
|
||
""" | ||
|
||
def __init__(self, f): | ||
def __init__(self, f: IO): | ||
self.mmap = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) | ||
|
||
def __getattr__(self, name): | ||
def __getattr__(self, name: str): | ||
return getattr(self.mmap, name) | ||
|
||
def __iter__(self): | ||
def __iter__(self) -> "MMapWrapper": | ||
return self | ||
|
||
def __next__(self): | ||
def __next__(self) -> str: | ||
newline = self.mmap.readline() | ||
|
||
# readline returns bytes, not str, but Python's CSV reader | ||
|
@@ -491,16 +506,16 @@ class UTF8Recoder(BaseIterator): | |
Iterator that reads an encoded stream and re-encodes the input to UTF-8 | ||
""" | ||
|
||
def __init__(self, f, encoding): | ||
def __init__(self, f: BinaryIO, encoding: str): | ||
self.reader = codecs.getreader(encoding)(f) | ||
|
||
def read(self, bytes=-1): | ||
def read(self, bytes: int = -1) -> bytes: | ||
return self.reader.read(bytes).encode("utf-8") | ||
|
||
def readline(self): | ||
def readline(self) -> bytes: | ||
return self.reader.readline().encode("utf-8") | ||
|
||
def next(self): | ||
def next(self) -> bytes: | ||
return next(self.reader).encode("utf-8") | ||
|
||
|
||
|
@@ -511,5 +526,7 @@ def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds): | |
return csv.reader(f, dialect=dialect, **kwds) | ||
|
||
|
||
def UnicodeWriter(f, dialect=csv.excel, encoding="utf-8", **kwds): | ||
def UnicodeWriter( | ||
f: TextIO, dialect: Type[csv.Dialect] = csv.excel, encoding: str = "utf-8", **kwds | ||
): | ||
return csv.writer(f, dialect=dialect, **kwds) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
import re | ||
from shutil import get_terminal_size | ||
from typing import ( | ||
IO, | ||
TYPE_CHECKING, | ||
Any, | ||
Callable, | ||
|
@@ -730,6 +731,11 @@ def to_string(self) -> None: | |
""" | ||
Render a DataFrame to a console-friendly tabular output. | ||
""" | ||
# Note: the to_string method only accepts IO whereas to_html and | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the notes - are these bugs? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are the sort of inconsistencies between to_html, to_string and to_latex that I was hoping to find by adding type hints. This should help with refactoring the three methods to share buffer handling code. Can't really call them bugs when the documentation doesn't say that filenames are accepted (even though they are for to_html and to_latex).
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you know what the lifecycle of |
||
# to_latex accept FilePathOrBuffer, will raise | ||
# AttributeError: 'str' object has no attribute 'writelines' | ||
self.buf = cast(IO, self.buf) | ||
|
||
from pandas import Series | ||
|
||
frame = self.frame | ||
|
@@ -902,6 +908,9 @@ def to_html( | |
Klass = NotebookFormatter if notebook else HTMLFormatter | ||
html = Klass(self, classes=classes, border=border).render() | ||
if hasattr(self.buf, "write"): | ||
# Note: only TextIO is supported, a BytesIO object will raise | ||
# TypeError: a bytes-like object is required, not 'str' | ||
self.buf = cast(TextIO, self.buf) | ||
buffer_put_lines(self.buf, html) | ||
elif isinstance(self.buf, str): | ||
with open(self.buf, "w") as f: | ||
|
Uh oh!
There was an error while loading. Please reload this page.