Skip to content

GH-130798: Add type hints to pathlib.types #131639

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 57 additions & 36 deletions Lib/pathlib/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,15 @@
from glob import _PathGlobber
from pathlib._os import magic_open, ensure_distinct_paths, ensure_different_files, copyfileobj
from pathlib import PurePath, Path
from typing import Optional, Protocol, runtime_checkable
from typing import (
Any, BinaryIO, Callable, Generator, Iterator, Literal, Optional, Protocol, Self, Sequence, TypeVar,
runtime_checkable,
)

_WP = TypeVar("_WP", bound="_WritablePath")

def _explode_path(path, split):

def _explode_path(path: str, split: Callable[[str], tuple[str, str]]) -> tuple[str, list[str]]:
"""
Split the path into a 2-tuple (anchor, parts), where *anchor* is the
uppermost parent of the path (equivalent to path.parents[-1]), and
Expand Down Expand Up @@ -70,38 +75,38 @@ class _JoinablePath(ABC):

@property
@abstractmethod
def parser(self):
def parser(self) -> _PathParser:
"""Implementation of pathlib._types.Parser used for low-level path
parsing and manipulation.
"""
raise NotImplementedError

@abstractmethod
def with_segments(self, *pathsegments):
def with_segments(self, *pathsegments: str) -> Self:
"""Construct a new path object from any number of path-like objects.
Subclasses may override this method to customize how new path objects
are created from methods like `iterdir()`.
"""
raise NotImplementedError

@abstractmethod
def __str__(self):
def __str__(self) -> str:
"""Return the string representation of the path, suitable for
passing to system calls."""
raise NotImplementedError

@property
def anchor(self):
def anchor(self) -> str:
"""The concatenation of the drive and root, or ''."""
return _explode_path(str(self), self.parser.split)[0]

@property
def name(self):
def name(self) -> str:
"""The final path component, if any."""
return self.parser.split(str(self))[1]

@property
def suffix(self):
def suffix(self) -> str:
"""
The final component's last suffix, if any.

Expand All @@ -110,7 +115,7 @@ def suffix(self):
return self.parser.splitext(self.name)[1]

@property
def suffixes(self):
def suffixes(self) -> Sequence[str]:
"""
A list of the final component's suffixes, if any.

Expand All @@ -125,11 +130,11 @@ def suffixes(self):
return suffixes[::-1]

@property
def stem(self):
def stem(self) -> str:
"""The final path component, minus its last suffix."""
return self.parser.splitext(self.name)[0]

def with_name(self, name):
def with_name(self, name: str) -> Self:
"""Return a new path with the file name changed."""
split = self.parser.split
if split(name)[0]:
Expand All @@ -138,7 +143,7 @@ def with_name(self, name):
path = path.removesuffix(split(path)[1]) + name
return self.with_segments(path)

def with_stem(self, stem):
def with_stem(self, stem: str) -> Self:
"""Return a new path with the stem changed."""
suffix = self.suffix
if not suffix:
Expand All @@ -149,7 +154,7 @@ def with_stem(self, stem):
else:
return self.with_name(stem + suffix)

def with_suffix(self, suffix):
def with_suffix(self, suffix: str) -> Self:
"""Return a new path with the file suffix changed. If the path
has no suffix, add given suffix. If the given suffix is an empty
string, remove the suffix from the path.
Expand All @@ -164,36 +169,36 @@ def with_suffix(self, suffix):
return self.with_name(stem + suffix)

@property
def parts(self):
def parts(self) -> Sequence[str]:
"""An object providing sequence-like access to the
components in the filesystem path."""
anchor, parts = _explode_path(str(self), self.parser.split)
if anchor:
parts.append(anchor)
return tuple(reversed(parts))

def joinpath(self, *pathsegments):
def joinpath(self, *pathsegments: str) -> Self:
"""Combine this path with one or several arguments, and return a
new path representing either a subpath (if all arguments are relative
paths) or a totally different path (if one of the arguments is
anchored).
"""
return self.with_segments(str(self), *pathsegments)

def __truediv__(self, key):
def __truediv__(self, key: str) -> Self:
try:
return self.with_segments(str(self), key)
except TypeError:
return NotImplemented

def __rtruediv__(self, key):
def __rtruediv__(self, key: str) -> Self:
try:
return self.with_segments(key, str(self))
except TypeError:
return NotImplemented

@property
def parent(self):
def parent(self) -> Self:
"""The logical parent of the path."""
path = str(self)
parent = self.parser.split(path)[0]
Expand All @@ -202,7 +207,7 @@ def parent(self):
return self

@property
def parents(self):
def parents(self) -> Sequence[Self]:
"""A sequence of this path's logical parents."""
split = self.parser.split
path = str(self)
Expand All @@ -214,7 +219,7 @@ def parents(self):
parent = split(path)[0]
return tuple(parents)

def full_match(self, pattern):
def full_match(self, pattern: str) -> bool:
"""
Return True if this path matches the given glob-style pattern. The
pattern is matched against the entire path.
Expand All @@ -236,45 +241,50 @@ class _ReadablePath(_JoinablePath):

@property
@abstractmethod
def info(self):
def info(self) -> PathInfo:
"""
A PathInfo object that exposes the file type and other file attributes
of this path.
"""
raise NotImplementedError

@abstractmethod
def __open_rb__(self, buffering=-1):
def __open_rb__(self, buffering: int = -1) -> BinaryIO:
"""
Open the file pointed to by this path for reading in binary mode and
return a file object, like open(mode='rb').
"""
raise NotImplementedError

def read_bytes(self):
def read_bytes(self) -> bytes:
"""
Open the file in bytes mode, read it, and close the file.
"""
with magic_open(self, mode='rb', buffering=0) as f:
return f.read()

def read_text(self, encoding=None, errors=None, newline=None):
def read_text(
self,
encoding: Optional[str] = None,
errors: Optional[str] = None,
newline: Optional[str] = None,
) -> str:
"""
Open the file in text mode, read it, and close the file.
"""
with magic_open(self, mode='r', encoding=encoding, errors=errors, newline=newline) as f:
return f.read()

@abstractmethod
def iterdir(self):
def iterdir(self) -> Iterator[Self]:
"""Yield path objects of the directory contents.

The children are yielded in arbitrary order, and the
special entries '.' and '..' are not included.
"""
raise NotImplementedError

def glob(self, pattern, *, recurse_symlinks=True):
def glob(self, pattern: str, *, recurse_symlinks: Literal[True] = True) -> Iterator[Self]:
"""Iterate over this subtree and yield all existing files (of any
kind, including directories) matching the given relative pattern.
"""
Expand All @@ -290,7 +300,12 @@ def glob(self, pattern, *, recurse_symlinks=True):
select = globber.selector(parts)
return select(self.joinpath(''))

def walk(self, top_down=True, on_error=None, follow_symlinks=False):
def walk(
self,
top_down: bool = True,
on_error: Optional[Callable[[Exception], None]] = None,
follow_symlinks: bool = False,
) -> Generator[tuple[Self, list[str], list[str]]]:
"""Walk the directory tree from this directory, similar to os.walk()."""
paths = [self]
while paths:
Expand Down Expand Up @@ -322,21 +337,21 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False):
paths += [path.joinpath(d) for d in reversed(dirnames)]

@abstractmethod
def readlink(self):
def readlink(self) -> Self:
"""
Return the path to which the symbolic link points.
"""
raise NotImplementedError

def copy(self, target, **kwargs):
def copy(self, target: _WP, **kwargs: Any) -> _WP:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to use a ParamSpec/similar here to show that the keyword arguments are passed to target._copy_from()?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I spent quite a bit of time now trying to get this to work, but I believe the way _ReadablePath.copy and _WritablePath._copy_from are coupled is currently not representable in the Python type system.

Be aware, I am not an expert in this, so all conclusions I draw here should be confirmed with someone who is.

Ideally the typechecker would just be able to infer if keyword arguments are not compatible with the signature of a detected subclass of _WritablePath._copy_from . But all my attempts failed so far. Basically the minimal scenario that needs to be supported is:

class R:
    def copy[T: W](self, target: T, **kwargs) -> T:
        target._copy_from(self, **kwargs)
        return target

class W:
    def _copy_from(self, source: R, /, *, follow_symlinks: Literal[True] = True) -> None:
        return
  1. In this example: "not typing the kwargs" (or typing them as Any) leads to missing incorrect keyword arguments: https://mypy-play.net/?mypy=latest&python=3.13&gist=7608e8dcd5e4b58dc2fc2c355bf6ed89

  2. Trying to use TypedDicts to define the kwargs doesn't work either because the definitions would have to be provided to _ReadablePath.copy for each speficic target class: https://mypy-play.net/?mypy=latest&python=3.13&gist=12f4cbdd725073d8a152a90a941922cf

  3. ParamSpec looks pretty promising but to be able to actually bind the parameters, you need to go via a protocol class, which prevents you from reusing the actual target type as return type of the _ReadablePath.copy method. Moreover ParamSpec can't be used without binding P.args to the function signature: https://mypy-play.net/?mypy=latest&python=3.13&gist=b47ea834a0f57a722b8124f76f3fb6d4

Ideally we'd be able to do something like:

  class R:
      def copy[T: W, **P](self, target: T[P], **kwargs: P.kwargs) -> T[P]: ...
  class W[**P]:
      def _copy_from(self, source: R, **kwargs: P.kwargs) -> None: ...

But from reading through a bunch of docs and issues, I this seems impossible. I believe the correct related issue here should be this one python/typing#548 for support of higher-kinded typevars, and we'd need kwargs_only support for ParamSpec: python/typing#1524

My guess is that by redesigning the coupling between _ReadablePath.copy and _WritablePath._copy_from it should be possible to better represent this in the type system. But it's getting late here, and I think I'll revisit this with a fresh mind tomorrow.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Intersection types might also work, since then you could do the following:

def copy[T: W, **P](self, target: T & SupportsCopyFrom[P], *args: P.args, **kwargs: P.kwargs) -> T: ...

This would mean that target must be both a T and SupportsCopyFrom simultaneously. Intersections I think are being considered more seriously than HKT, but they're also another quite substantial addition to the type system.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the analysis! Feels like we're pushing the current type system a bit far, so I'm happy with Any. I could log an issue in pathlib-abc and link back to this thread.

I'm also happy to change how copy() and _copy_from() interact if there's something better we can do.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I have a solution, though it's awkward. First change is to pass *args too, meaning ParamSpec works. Second is to require _copy_to() to redundantly return self. Then we could do the following:

class _HasCopyTo[**Args, R](Protocol):
    def _copy_to(
        self, path: ReadablePath, /, 
        *args: Args.args, **kwargs: Args.kwargs) -> R: ...

class ReadablePath:
    def copy[**Args, R](
        self, path: _HasCopyTo[Args, R], /, 
        *args: Args.args, **kwargs: Args.kwargs,
    ) -> R:
        res = path._copy_to(self, *args, **kwargs)
        assert path is res, "Must return self"
        return res

class WritablePath(ReadablePath):
    @abstractmethod
    def _copy_to(self, path: ReadablePath, /, *args: Any, **kwargs: Any) -> Self: ...

class Path(WritablePath):
    def _copy_to(self, path: ReadablePath, /, some_option: bool) -> Self:
        do_copy(self, path)
        return self

The Any in WritablePath is required to allow subclasses to require any arguments, but it also (correctly?) means if you call copy() with something that's just WritablePath it'll accept anything.

Copy link
Contributor Author

@ap-- ap-- Mar 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does! I'll make the change:

https://mypy-play.net/?mypy=latest&python=3.11&gist=8c300324c700eea4ffa7b7776a460f6e

code
from typing import Literal
from typing import Protocol
from typing import Generic
from typing import TypeVar
from typing import ParamSpec
from typing import Callable
from typing import Self
from typing import ClassVar
from typing import Any

R = TypeVar("R", bound="_WritablePath", covariant=True)
P = ParamSpec("P")

class _HasCopyFrom(Protocol[R, P]):
    def _copy_from(self, source: _ReadablePath, /, *args: P.args, **kwargs: P.kwargs) -> R:
        ...

class _ReadablePath:
    def copy(self, path: _HasCopyFrom[R, P], /, *args: P.args, **kwargs: P.kwargs) -> R:
        return path._copy_from(self, *args, **kwargs)

class _WritablePath:
    def _copy_from(self, path: _ReadablePath, /, *args: Any, **kwargs: Any) -> Self: ...

class MyPathR(_ReadablePath):
    pass

class MyPathW0(_WritablePath):
    def _copy_from(self, path: _ReadablePath, /, *, some_option: bool = True) -> Self:
        return self

class MyPathW1(_WritablePath):
    def _copy_from(self, path: _ReadablePath, /, *, other_option: int = 3) -> Self:
        return self

r = MyPathR()       
w0 = MyPathW0()
w1 = MyPathW1()
 
# correct       
reveal_type(r.copy(w0))
r.copy(w0, some_option=False)
reveal_type(r.copy(w1))
r.copy(w1, other_option=4)

# error
r.copy(w0, dont_exist=2)
r.copy(w1, text="str")
r.copy(w0, some_option=1.0)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm, I tried implementing the changes, and they don't fully work. While they solve the copy (*args and) **kwargs issue, they cause type checking issues in for example copy_into because mypy can't infer the actual type of path, but only that path has the structural type _HasCopyFrom. This is a limitation of how the protocol is expressed (see here: https://mypy-play.net/?mypy=latest&python=3.12&gist=c744090b69aad82d75c14e6ab7dedb4c ). So it seems intersection types would be required to do this correctly.

Also on a side note: I now wonder if follow_symlinks should be an explicit keyword argument in copy and copy_into since it's mandatory in _WritablePath._copy_from's default implementation.

Additionally, forcing _copy_from to return Self would interfere with #131636

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed the issues - you need to be returning R from _copy_from, and _HasCopyFrom.__truediv__/joinpath needs to return _HasCopyFrom, not just R. That was causing it to discard the paramspec and specific return value. For #131636, you can still do -> Generator[tuple[...], None, Self].

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, but the problem imo is that both joinpath and __truediv__ should actually return the same type. The workaround is nice, but to make it work, one needs to be typed as returning R because it has to return the WritablePath instance and the other as the HasCopyFrom protocol because it's passed again into .copy ...

So in the end you have to adjust the protocol to the exact implementation of copy and copy_into which is unfortunate. If both in copy and copy_into joinpath would be used once as the return type and once to pass as the target arg to copy, the typing workaround would not work.

I'm not sure if the benefit of being able to type check the copy kwargs justifies the introduction of this trick.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree with @ap--. We could always revisit this later. Thank you both for your work on this, it's been really valuable.

"""
Recursively copy this file or directory tree to the given destination.
"""
ensure_distinct_paths(self, target)
target._copy_from(self, **kwargs)
return target.joinpath() # Empty join to ensure fresh metadata.

def copy_into(self, target_dir, **kwargs):
def copy_into(self, target_dir: _WP, **kwargs: Any) -> _WP:
"""
Copy this file or directory tree into the given existing directory.
"""
Expand All @@ -356,29 +371,29 @@ class _WritablePath(_JoinablePath):
__slots__ = ()

@abstractmethod
def symlink_to(self, target, target_is_directory=False):
def symlink_to(self, target: str, target_is_directory: bool = False) -> None:
"""
Make this path a symlink pointing to the target path.
Note the order of arguments (link, target) is the reverse of os.symlink.
"""
raise NotImplementedError

@abstractmethod
def mkdir(self):
def mkdir(self) -> None:
"""
Create a new directory at this given path.
"""
raise NotImplementedError

@abstractmethod
def __open_wb__(self, buffering=-1):
def __open_wb__(self, buffering: int = -1) -> BinaryIO:
"""
Open the file pointed to by this path for writing in binary mode and
return a file object, like open(mode='wb').
"""
raise NotImplementedError

def write_bytes(self, data):
def write_bytes(self, data: bytes) -> int:
"""
Open the file in bytes mode, write to it, and close the file.
"""
Expand All @@ -387,7 +402,13 @@ def write_bytes(self, data):
with magic_open(self, mode='wb') as f:
return f.write(view)

def write_text(self, data, encoding=None, errors=None, newline=None):
def write_text(
self,
data: str,
encoding: Optional[str] = None,
errors: Optional[str] = None,
newline: Optional[str] = None,
) -> int:
"""
Open the file in text mode, write to it, and close the file.
"""
Expand All @@ -397,7 +418,7 @@ def write_text(self, data, encoding=None, errors=None, newline=None):
with magic_open(self, mode='w', encoding=encoding, errors=errors, newline=newline) as f:
return f.write(data)

def _copy_from(self, source, follow_symlinks=True):
def _copy_from(self, source: _ReadablePath, follow_symlinks: bool = True) -> None:
"""
Recursively copy the given path to this path.
"""
Expand Down
Loading