Skip to content

Change internal types in individual files to be private. Use TypeAlias in code where types are declared. #61504

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
TYPE_CHECKING,
Any,
Literal,
TypeAlias,
cast,
)

Expand Down Expand Up @@ -71,7 +72,7 @@
from pandas.core.resample import Resampler
from pandas.core.window.rolling import BaseWindow

ResType = dict[int, Any]
_ResType: TypeAlias = dict[int, Any]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this idea of making these lead with a starting _ is to they are marked as "private"? Should they instead just be moved to _typing.py with other defined "private" annotations?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this idea of making these lead with a starting _ is to they are marked as "private"? Should they instead just be moved to _typing.py with other defined "private" annotations?

The difference with the annotations in _typing.py is that many (but not all) are used in more than one pandas module. For the ones that are marked private in this PR, they are only used locally within that module.

I'm also going to be doing an MR to make more of the ones in _typing.py public.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We were planning on moving core to _core, but ran into some bikeshedding issues. I think we should still operate under the assumption we will eventually do that (I believe there is consensus that we really want to do it). Assuming that is done, what will we want to do about private variables like this? I think we should strive for consistency, either all with a leading underscore or none.



class BaseExecutionEngine(abc.ABC):
Expand Down Expand Up @@ -934,7 +935,7 @@ def validate_values_for_numba(self) -> None:

@abc.abstractmethod
def wrap_results_for_axis(
self, results: ResType, res_index: Index
self, results: _ResType, res_index: Index
) -> DataFrame | Series:
pass

Expand Down Expand Up @@ -1163,7 +1164,7 @@ def apply_standard(self):
# wrap results
return self.wrap_results(results, res_index)

def apply_series_generator(self) -> tuple[ResType, Index]:
def apply_series_generator(self) -> tuple[_ResType, Index]:
assert callable(self.func)

series_gen = self.series_generator
Expand Down Expand Up @@ -1193,7 +1194,7 @@ def apply_series_numba(self):
results = self.apply_with_numba()
return results, self.result_index

def wrap_results(self, results: ResType, res_index: Index) -> DataFrame | Series:
def wrap_results(self, results: _ResType, res_index: Index) -> DataFrame | Series:
from pandas import Series

# see if we can infer the results
Expand Down Expand Up @@ -1289,7 +1290,7 @@ def result_columns(self) -> Index:
return self.index

def wrap_results_for_axis(
self, results: ResType, res_index: Index
self, results: _ResType, res_index: Index
) -> DataFrame | Series:
"""return the results for the rows"""

Expand Down Expand Up @@ -1433,7 +1434,7 @@ def result_columns(self) -> Index:
return self.columns

def wrap_results_for_axis(
self, results: ResType, res_index: Index
self, results: _ResType, res_index: Index
) -> DataFrame | Series:
"""return the results for the columns"""
result: DataFrame | Series
Expand All @@ -1453,7 +1454,7 @@ def wrap_results_for_axis(

return result

def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame:
def infer_to_same_shape(self, results: _ResType, res_index: Index) -> DataFrame:
"""infer the results to the same shape as the input object"""
result = self.obj._constructor(data=results)
result = result.T
Expand Down
16 changes: 9 additions & 7 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
TYPE_CHECKING,
Any,
Literal,
TypeAlias,
Union,
cast,
final,
Expand Down Expand Up @@ -161,7 +162,8 @@
TimedeltaArray,
)

DTScalarOrNaT = Union[DatetimeLikeScalar, NaTType]
# underscore at end because of rule PYI043 that private types should not end with 'T'
_DTScalarOrNaT_: TypeAlias = DatetimeLikeScalar | NaTType


def _make_unpacked_invalid_op(op_name: str):
Expand Down Expand Up @@ -236,7 +238,7 @@ def _scalar_type(self) -> type[DatetimeLikeScalar]:
"""
raise AbstractMethodError(self)

def _scalar_from_string(self, value: str) -> DTScalarOrNaT:
def _scalar_from_string(self, value: str) -> _DTScalarOrNaT_:
"""
Construct a scalar type from a string.

Expand All @@ -257,7 +259,7 @@ def _scalar_from_string(self, value: str) -> DTScalarOrNaT:
raise AbstractMethodError(self)

def _unbox_scalar(
self, value: DTScalarOrNaT
self, value: _DTScalarOrNaT_
) -> np.int64 | np.datetime64 | np.timedelta64:
"""
Unbox the integer value of a scalar `value`.
Expand All @@ -279,7 +281,7 @@ def _unbox_scalar(
"""
raise AbstractMethodError(self)

def _check_compatible_with(self, other: DTScalarOrNaT) -> None:
def _check_compatible_with(self, other: _DTScalarOrNaT_) -> None:
"""
Verify that `self` and `other` are compatible.

Expand Down Expand Up @@ -370,23 +372,23 @@ def __array__(
return self._ndarray

@overload
def __getitem__(self, key: ScalarIndexer) -> DTScalarOrNaT: ...
def __getitem__(self, key: ScalarIndexer) -> _DTScalarOrNaT_: ...

@overload
def __getitem__(
self,
key: SequenceIndexer | PositionalIndexerTuple,
) -> Self: ...

def __getitem__(self, key: PositionalIndexer2D) -> Self | DTScalarOrNaT:
def __getitem__(self, key: PositionalIndexer2D) -> Self | _DTScalarOrNaT_:
"""
This getitem defers to the underlying array, which by-definition can
only handle list-likes, slices, and integer scalars
"""
# Use cast as we know we will get back a DatetimeLikeArray or DTScalar,
# but skip evaluating the Union at runtime for performance
# (see https://github.com/pandas-dev/pandas/pull/44624)
result = cast("Union[Self, DTScalarOrNaT]", super().__getitem__(key))
result = cast(Union[Self, _DTScalarOrNaT_], super().__getitem__(key))
if lib.is_scalar(result):
return result
else:
Expand Down
36 changes: 19 additions & 17 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from typing import (
TYPE_CHECKING,
Literal,
Union,
TypeAlias,
overload,
)

Expand Down Expand Up @@ -109,8 +109,8 @@
)


IntervalSide = Union[TimeArrayLike, np.ndarray]
IntervalOrNA = Union[Interval, float]
_IntervalSide: TypeAlias = TimeArrayLike | np.ndarray
_IntervalOrNA: TypeAlias = Interval | float

_interval_shared_docs: dict[str, str] = {}

Expand Down Expand Up @@ -216,8 +216,8 @@ def ndim(self) -> Literal[1]:
return 1

# To make mypy recognize the fields
_left: IntervalSide
_right: IntervalSide
_left: _IntervalSide
_right: _IntervalSide
_dtype: IntervalDtype

# ---------------------------------------------------------------------
Expand All @@ -234,8 +234,8 @@ def __new__(
data = extract_array(data, extract_numpy=True)

if isinstance(data, cls):
left: IntervalSide = data._left
right: IntervalSide = data._right
left: _IntervalSide = data._left
right: _IntervalSide = data._right
closed = closed or data.closed
dtype = IntervalDtype(left.dtype, closed=closed)
else:
Expand Down Expand Up @@ -277,8 +277,8 @@ def __new__(
@classmethod
def _simple_new(
cls,
left: IntervalSide,
right: IntervalSide,
left: _IntervalSide,
right: _IntervalSide,
dtype: IntervalDtype,
) -> Self:
result = IntervalMixin.__new__(cls)
Expand All @@ -296,7 +296,7 @@ def _ensure_simple_new_inputs(
closed: IntervalClosedType | None = None,
copy: bool = False,
dtype: Dtype | None = None,
) -> tuple[IntervalSide, IntervalSide, IntervalDtype]:
) -> tuple[_IntervalSide, _IntervalSide, IntervalDtype]:
"""Ensure correctness of input parameters for cls._simple_new."""
from pandas.core.indexes.base import ensure_index

Expand Down Expand Up @@ -704,12 +704,12 @@ def __len__(self) -> int:
return len(self._left)

@overload
def __getitem__(self, key: ScalarIndexer) -> IntervalOrNA: ...
def __getitem__(self, key: ScalarIndexer) -> _IntervalOrNA: ...

@overload
def __getitem__(self, key: SequenceIndexer) -> Self: ...

def __getitem__(self, key: PositionalIndexer) -> Self | IntervalOrNA:
def __getitem__(self, key: PositionalIndexer) -> Self | _IntervalOrNA:
key = check_array_indexer(self, key)
left = self._left[key]
right = self._right[key]
Expand Down Expand Up @@ -858,7 +858,7 @@ def argsort(
ascending=ascending, kind=kind, na_position=na_position, **kwargs
)

def min(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOrNA:
def min(self, *, axis: AxisInt | None = None, skipna: bool = True) -> _IntervalOrNA:
nv.validate_minmax_axis(axis, self.ndim)

if not len(self):
Expand All @@ -875,7 +875,7 @@ def min(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOr
indexer = obj.argsort()[0]
return obj[indexer]

def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOrNA:
def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> _IntervalOrNA:
nv.validate_minmax_axis(axis, self.ndim)

if not len(self):
Expand Down Expand Up @@ -1016,8 +1016,10 @@ def _concat_same_type(cls, to_concat: Sequence[IntervalArray]) -> Self:
raise ValueError("Intervals must all be closed on the same side.")
closed = closed_set.pop()

left: IntervalSide = np.concatenate([interval.left for interval in to_concat])
right: IntervalSide = np.concatenate([interval.right for interval in to_concat])
left: _IntervalSide = np.concatenate([interval.left for interval in to_concat])
right: _IntervalSide = np.concatenate(
[interval.right for interval in to_concat]
)

left, right, dtype = cls._ensure_simple_new_inputs(left, right, closed=closed)

Expand Down Expand Up @@ -1952,7 +1954,7 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
return isin(self.astype(object), values.astype(object))

@property
def _combined(self) -> IntervalSide:
def _combined(self) -> _IntervalSide:
# error: Item "ExtensionArray" of "ExtensionArray | ndarray[Any, Any]"
# has no attribute "reshape" [union-attr]
left = self.left._values.reshape(-1, 1) # type: ignore[union-attr]
Expand Down
4 changes: 0 additions & 4 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import re
from typing import (
TYPE_CHECKING,
Union,
)
import warnings

Expand Down Expand Up @@ -63,9 +62,6 @@
from pandas import Series


ArrowStringScalarOrNAT = Union[str, libmissing.NAType]


def _chk_pyarrow_available() -> None:
if pa_version_under10p1:
msg = "pyarrow>=10.0.1 is required for PyArrow backed ArrowExtensionArray."
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
Any,
Literal,
NamedTuple,
TypeAlias,
TypeVar,
Union,
cast,
)
import warnings
Expand Down Expand Up @@ -102,7 +102,7 @@
from pandas.core.generic import NDFrame

# TODO(typing) the return value on this callable should be any *scalar*.
AggScalar = Union[str, Callable[..., Any]]
_AggScalar: TypeAlias = str | Callable[..., Any]
# TODO: validate types on ScalarResult and move to _typing
# Blocked from using by https://github.com/python/mypy/issues/1484
# See note at _mangle_lambda_list
Expand Down Expand Up @@ -141,7 +141,7 @@ class NamedAgg(NamedTuple):
"""

column: Hashable
aggfunc: AggScalar
aggfunc: _AggScalar


@set_module("pandas.api.typing")
Expand Down
20 changes: 10 additions & 10 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class providing the base-class of operations.
from typing import (
TYPE_CHECKING,
Literal,
TypeAlias,
TypeVar,
Union,
cast,
Expand Down Expand Up @@ -449,13 +450,13 @@ def f(self):
return attr


_KeysArgType = Union[
Hashable,
list[Hashable],
Callable[[Hashable], Hashable],
list[Callable[[Hashable], Hashable]],
Mapping[Hashable, Hashable],
]
_KeysArgType: TypeAlias = (
Hashable
| list[Hashable]
| Callable[[Hashable], Hashable]
| list[Callable[[Hashable], Hashable]]
| Mapping[Hashable, Hashable]
)


class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin):
Expand Down Expand Up @@ -957,9 +958,8 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]:
level = self.level
result = self._grouper.get_iterator(self._selected_obj)
# mypy: Argument 1 to "len" has incompatible type "Hashable"; expected "Sized"
if (
(is_list_like(level) and len(level) == 1) # type: ignore[arg-type]
or (isinstance(keys, list) and len(keys) == 1)
if (is_list_like(level) and len(level) == 1) or ( # type: ignore[arg-type]
isinstance(keys, list) and len(keys) == 1
):
# GH#42795 - when keys is a list, return tuples even when length is 1
result = (((key,), group) for key, group in result)
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from typing import (
TYPE_CHECKING,
Any,
TypeVar,
cast,
final,
)
Expand Down Expand Up @@ -83,6 +82,7 @@
Axis,
AxisInt,
Self,
T,
npt,
)

Expand All @@ -91,7 +91,6 @@
Series,
)

T = TypeVar("T")
# "null slice"
_NS = slice(None, None)
_one_ellipsis_message = "indexer may only contain one '...' entry"
Expand Down
Loading
Loading