pandas-dev · WillAyd · Aug 2, 2019 · Jul 25, 2019 · Jul 25, 2019 · Jul 25, 2019
diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -10,6 +10,7 @@
 import mmap
 import os
 import pathlib
+from typing import IO, BinaryIO, Optional, TextIO, Tuple, Type
 from urllib.error import URLError  # noqa
 from urllib.parse import (  # noqa
     urlencode,
@@ -32,6 +33,8 @@
 
 from pandas.core.dtypes.common import is_file_like
 
+from pandas._typing import FilePathOrBuffer
+
 # gh-12665: Alias for now and remove later.
 CParserError = ParserError
 
@@ -68,14 +71,14 @@ class BaseIterator:
     Useful only when the object being iterated is non-reusable (e.g. OK for a
     parser, not for an in-memory table, yes for its iterator)."""
 
-    def __iter__(self):
+    def __iter__(self) -> "BaseIterator":
         return self
 
     def __next__(self):
         raise AbstractMethodError(self)
 
 
-def _is_url(url):
+def _is_url(url) -> bool:
     """Check to see if a URL has a valid protocol.
 
     Parameters
@@ -93,7 +96,7 @@ def _is_url(url):
         return False
 
 
-def _expand_user(filepath_or_buffer):
+def _expand_user(filepath_or_buffer: FilePathOrBuffer) -> FilePathOrBuffer:
     """Return the argument with an initial component of ~ or ~user
        replaced by that user's home directory.
 
@@ -111,7 +114,7 @@ def _expand_user(filepath_or_buffer):
     return filepath_or_buffer
 
 
-def _validate_header_arg(header):
+def _validate_header_arg(header) -> None:
     if isinstance(header, bool):
         raise TypeError(
             "Passing a bool to header is invalid. "
@@ -121,7 +124,7 @@ def _validate_header_arg(header):
         )
 
 
-def _stringify_path(filepath_or_buffer):
+def _stringify_path(filepath_or_buffer: FilePathOrBuffer) -> FilePathOrBuffer:
     """Attempt to convert a path-like object to a string.
 
     Parameters
@@ -144,21 +147,22 @@ def _stringify_path(filepath_or_buffer):
     strings, buffers, or anything else that's not even path-like.
     """
     if hasattr(filepath_or_buffer, "__fspath__"):
-        return filepath_or_buffer.__fspath__()
+        # https://github.com/python/mypy/issues/1424
+        return filepath_or_buffer.__fspath__()  # type: ignore
     elif isinstance(filepath_or_buffer, pathlib.Path):
         return str(filepath_or_buffer)
     return _expand_user(filepath_or_buffer)
 
 
-def is_s3_url(url):
+def is_s3_url(url) -> bool:
     """Check for an s3, s3n, or s3a url"""
     try:
         return parse_url(url).scheme in ["s3", "s3n", "s3a"]
     except Exception:
         return False
 
 
-def is_gcs_url(url):
+def is_gcs_url(url) -> bool:
     """Check for a gcs url"""
     try:
         return parse_url(url).scheme in ["gcs", "gs"]
@@ -167,8 +171,11 @@ def is_gcs_url(url):
 
 
 def get_filepath_or_buffer(
-    filepath_or_buffer, encoding=None, compression=None, mode=None
-):
+    filepath_or_buffer: FilePathOrBuffer,
+    encoding: Optional[str] = None,
+    compression: Optional[str] = None,
+    mode: Optional[str] = None,
+) -> Tuple[FilePathOrBuffer, Optional[str], Optional[str], bool]:
     """
     If the filepath_or_buffer is a url, translate and return the buffer.
     Otherwise passthrough.
@@ -190,7 +197,7 @@ def get_filepath_or_buffer(
     """
     filepath_or_buffer = _stringify_path(filepath_or_buffer)
 
-    if _is_url(filepath_or_buffer):
+    if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer):
         req = urlopen(filepath_or_buffer)
         content_encoding = req.headers.get("Content-Encoding", None)
         if content_encoding == "gzip":
@@ -224,7 +231,7 @@ def get_filepath_or_buffer(
     return filepath_or_buffer, None, compression, False
 
 
-def file_path_to_url(path):
+def file_path_to_url(path: str) -> str:
     """
     converts an absolute native path to a FILE URL.
 
@@ -242,7 +249,9 @@ def file_path_to_url(path):
 _compression_to_extension = {"gzip": ".gz", "bz2": ".bz2", "zip": ".zip", "xz": ".xz"}
 
 
-def _infer_compression(filepath_or_buffer, compression):
+def _infer_compression(
+    filepath_or_buffer: FilePathOrBuffer, compression: Optional[str]
+) -> Optional[str]:
     """
     Get the compression method for filepath_or_buffer. If compression='infer',
     the inferred compression method is returned. Otherwise, the input
@@ -435,7 +444,13 @@ class BytesZipFile(zipfile.ZipFile, BytesIO):  # type: ignore
     """
 
     # GH 17778
-    def __init__(self, file, mode, compression=zipfile.ZIP_DEFLATED, **kwargs):
+    def __init__(
+        self,
+        file: FilePathOrBuffer,
+        mode: str,
+        compression: int = zipfile.ZIP_DEFLATED,
+        **kwargs
+    ):
         if mode in ["wb", "rb"]:
             mode = mode.replace("b", "")
         super().__init__(file, mode, compression, **kwargs)
@@ -461,16 +476,16 @@ class MMapWrapper(BaseIterator):
 
     """
 
-    def __init__(self, f):
+    def __init__(self, f: IO):
         self.mmap = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
 
-    def __getattr__(self, name):
+    def __getattr__(self, name: str):
         return getattr(self.mmap, name)
 
-    def __iter__(self):
+    def __iter__(self) -> "MMapWrapper":
         return self
 
-    def __next__(self):
+    def __next__(self) -> str:
         newline = self.mmap.readline()
 
         # readline returns bytes, not str, but Python's CSV reader
@@ -491,16 +506,16 @@ class UTF8Recoder(BaseIterator):
     Iterator that reads an encoded stream and re-encodes the input to UTF-8
     """
 
-    def __init__(self, f, encoding):
+    def __init__(self, f: BinaryIO, encoding: str):
         self.reader = codecs.getreader(encoding)(f)
 
-    def read(self, bytes=-1):
+    def read(self, bytes: int = -1) -> bytes:
         return self.reader.read(bytes).encode("utf-8")
 
-    def readline(self):
+    def readline(self) -> bytes:
         return self.reader.readline().encode("utf-8")
 
-    def next(self):
+    def next(self) -> bytes:
         return next(self.reader).encode("utf-8")
 
 
@@ -511,5 +526,7 @@ def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds):
     return csv.reader(f, dialect=dialect, **kwds)
 
 
-def UnicodeWriter(f, dialect=csv.excel, encoding="utf-8", **kwds):
+def UnicodeWriter(
+    f: TextIO, dialect: Type[csv.Dialect] = csv.excel, encoding: str = "utf-8", **kwds
+):
     return csv.writer(f, dialect=dialect, **kwds)
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -8,6 +8,7 @@
 import re
 from shutil import get_terminal_size
 from typing import (
+    IO,
     TYPE_CHECKING,
     Any,
     Callable,
@@ -730,6 +731,11 @@ def to_string(self) -> None:
         """
         Render a DataFrame to a console-friendly tabular output.
         """
+        # Note: the to_string method only accepts IO whereas to_html and
+        # to_latex accept FilePathOrBuffer, will raise
+        # AttributeError: 'str' object has no attribute 'writelines'
+        self.buf = cast(IO, self.buf)
+
         from pandas import Series
 
         frame = self.frame
@@ -902,6 +908,9 @@ def to_html(
         Klass = NotebookFormatter if notebook else HTMLFormatter
         html = Klass(self, classes=classes, border=border).render()
         if hasattr(self.buf, "write"):
+            # Note: only TextIO is supported, a BytesIO object will raise
+            # TypeError: a bytes-like object is required, not 'str'
+            self.buf = cast(TextIO, self.buf)
             buffer_put_lines(self.buf, html)
         elif isinstance(self.buf, str):
             with open(self.buf, "w") as f:

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -460,7 +460,9 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
 
     if should_close:
         try:
-            fp_or_buf.close()
+            # error: Item "str" of "Union[str, Path, IO[Any]]" has no attribute "close"
+            # error: Item "Path" of "Union[str, Path, IO[Any]]" has no attribute "close"
+            fp_or_buf.close()  # type: ignore
         except ValueError:
             pass