Skip to content

REF: de-duplicate freq pinning/validation #55987

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 42 additions & 10 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1938,7 +1938,7 @@ def __init__(
freq = values.freq
elif freq and values.freq:
freq = to_offset(freq)
freq, _ = validate_inferred_freq(freq, values.freq, False)
freq = _validate_inferred_freq(freq, values.freq)

if dtype is not None and dtype != values.dtype:
# TODO: we only have tests for this for DTA, not TDA (2022-07-01)
Expand Down Expand Up @@ -2025,6 +2025,39 @@ def freq(self, value) -> None:

self._freq = value

@final
def _maybe_pin_freq(self, freq, validate_kwds: dict):
"""
Constructor helper to pin the appropriate `freq` attribute. Assumes
that self._freq is currently set to any freq inferred in
_from_sequence_not_strict.
"""
if freq is None:
# user explicitly passed None -> override any inferred_freq
self._freq = None
elif freq == "infer":
# if self._freq is *not* None then we already inferred a freq
# and there is nothing left to do
if self._freq is None:
# Set _freq directly to bypass duplicative _validate_frequency
# check.
self._freq = to_offset(self.inferred_freq)
elif freq is lib.no_default:
# user did not specify anything, keep inferred freq if the original
# data had one, otherwise do nothing
pass
elif self._freq is None:
# We cannot inherit a freq from the data, so we need to validate
# the user-passed freq
freq = to_offset(freq)
type(self)._validate_frequency(self, freq, **validate_kwds)
self._freq = freq
else:
# Otherwise we just need to check that the user-passed freq
# doesn't conflict with the one we already have.
freq = to_offset(freq)
_validate_inferred_freq(freq, self._freq)

@final
@classmethod
def _validate_frequency(cls, index, freq: BaseOffset, **kwargs):
Expand Down Expand Up @@ -2353,7 +2386,9 @@ def _is_dates_only(self) -> bool:
# Shared Constructor Helpers


def ensure_arraylike_for_datetimelike(data, copy: bool, cls_name: str):
def ensure_arraylike_for_datetimelike(
data, copy: bool, cls_name: str
) -> tuple[ArrayLike, bool]:
if not hasattr(data, "dtype"):
# e.g. list, tuple
if not isinstance(data, (list, tuple)) and np.ndim(data) == 0:
Expand Down Expand Up @@ -2426,9 +2461,9 @@ def validate_periods(periods: int | float | None) -> int | None:
return periods


def validate_inferred_freq(
freq, inferred_freq, freq_infer
) -> tuple[BaseOffset | None, bool]:
def _validate_inferred_freq(
freq: BaseOffset | None, inferred_freq: BaseOffset | None
) -> BaseOffset | None:
"""
If the user passes a freq and another freq is inferred from passed data,
require that they match.
Expand All @@ -2437,12 +2472,10 @@ def validate_inferred_freq(
----------
freq : DateOffset or None
inferred_freq : DateOffset or None
freq_infer : bool

Returns
-------
freq : DateOffset or None
freq_infer : bool

Notes
-----
Expand All @@ -2458,12 +2491,11 @@ def validate_inferred_freq(
)
if freq is None:
freq = inferred_freq
freq_infer = False

return freq, freq_infer
return freq


def maybe_infer_freq(freq):
def maybe_infer_freq(freq) -> tuple[BaseOffset | None, bool]:
"""
Comparing a DateOffset to the string "infer" raises, so we need to
be careful about comparisons. Make a dummy variable `freq_infer` to
Expand Down
63 changes: 24 additions & 39 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
from collections.abc import Iterator

from pandas._typing import (
ArrayLike,
DateTimeErrorChoices,
DtypeObj,
IntervalClosedType,
Expand Down Expand Up @@ -327,13 +328,10 @@ def _from_sequence_not_strict(
dayfirst: bool = False,
yearfirst: bool = False,
ambiguous: TimeAmbiguous = "raise",
):
) -> Self:
"""
A non-strict version of _from_sequence, called from DatetimeIndex.__new__.
"""
explicit_none = freq is None
freq = freq if freq is not lib.no_default else None
freq, freq_infer = dtl.maybe_infer_freq(freq)

# if the user either explicitly passes tz=None or a tz-naive dtype, we
# disallows inferring a tz.
Expand All @@ -349,13 +347,16 @@ def _from_sequence_not_strict(

unit = None
if dtype is not None:
if isinstance(dtype, np.dtype):
unit = np.datetime_data(dtype)[0]
else:
# DatetimeTZDtype
unit = dtype.unit
unit = dtl.dtype_to_unit(dtype)

data, copy = dtl.ensure_arraylike_for_datetimelike(
data, copy, cls_name="DatetimeArray"
)
inferred_freq = None
if isinstance(data, DatetimeArray):
inferred_freq = data.freq

subarr, tz, inferred_freq = _sequence_to_dt64(
subarr, tz = _sequence_to_dt64(
data,
copy=copy,
tz=tz,
Expand All @@ -372,26 +373,15 @@ def _from_sequence_not_strict(
"Use obj.tz_localize(None) instead."
)

freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer)
if explicit_none:
freq = None

data_unit = np.datetime_data(subarr.dtype)[0]
data_dtype = tz_to_dtype(tz, data_unit)
result = cls._simple_new(subarr, freq=freq, dtype=data_dtype)
result = cls._simple_new(subarr, freq=inferred_freq, dtype=data_dtype)
if unit is not None and unit != result.unit:
# If unit was specified in user-passed dtype, cast to it here
result = result.as_unit(unit)

if inferred_freq is None and freq is not None:
# this condition precludes `freq_infer`
cls._validate_frequency(result, freq, ambiguous=ambiguous)

elif freq_infer:
# Set _freq directly to bypass duplicative _validate_frequency
# check.
result._freq = to_offset(result.inferred_freq)

validate_kwds = {"ambiguous": ambiguous}
result._maybe_pin_freq(freq, validate_kwds)
return result

# error: Signature of "_generate_range" incompatible with supertype
Expand Down Expand Up @@ -2180,7 +2170,7 @@ def std(


def _sequence_to_dt64(
data,
data: ArrayLike,
*,
copy: bool = False,
tz: tzinfo | None = None,
Expand All @@ -2192,7 +2182,8 @@ def _sequence_to_dt64(
"""
Parameters
----------
data : list-like
data : np.ndarray or ExtensionArray
dtl.ensure_arraylike_for_datetimelike has already been called.
copy : bool, default False
tz : tzinfo or None, default None
dayfirst : bool, default False
Expand All @@ -2209,21 +2200,11 @@ def _sequence_to_dt64(
Where `unit` is "ns" unless specified otherwise by `out_unit`.
tz : tzinfo or None
Either the user-provided tzinfo or one inferred from the data.
inferred_freq : Tick or None
The inferred frequency of the sequence.

Raises
------
TypeError : PeriodDType data is passed
"""
inferred_freq = None

data, copy = dtl.ensure_arraylike_for_datetimelike(
data, copy, cls_name="DatetimeArray"
)

if isinstance(data, DatetimeArray):
inferred_freq = data.freq

# By this point we are assured to have either a numpy array or Index
data, copy = maybe_convert_dtype(data, copy, tz=tz)
Expand All @@ -2236,6 +2217,7 @@ def _sequence_to_dt64(
if data_dtype == object or is_string_dtype(data_dtype):
# TODO: We do not have tests specific to string-dtypes,
# also complex or categorical or other extension
data = cast(np.ndarray, data)
copy = False
if lib.infer_dtype(data, skipna=False) == "integer":
data = data.astype(np.int64)
Expand All @@ -2248,7 +2230,7 @@ def _sequence_to_dt64(
yearfirst=yearfirst,
creso=abbrev_to_npy_unit(out_unit),
)
return result, tz, None
return result, tz
else:
converted, inferred_tz = objects_to_datetime64(
data,
Expand All @@ -2273,14 +2255,15 @@ def _sequence_to_dt64(
result, _ = _construct_from_dt64_naive(
converted, tz=tz, copy=copy, ambiguous=ambiguous
)
return result, tz, None
return result, tz

data_dtype = data.dtype

# `data` may have originally been a Categorical[datetime64[ns, tz]],
# so we need to handle these types.
if isinstance(data_dtype, DatetimeTZDtype):
# DatetimeArray -> ndarray
data = cast(DatetimeArray, data)
tz = _maybe_infer_tz(tz, data.tz)
result = data._ndarray

Expand All @@ -2289,6 +2272,7 @@ def _sequence_to_dt64(
if isinstance(data, DatetimeArray):
data = data._ndarray

data = cast(np.ndarray, data)
result, copy = _construct_from_dt64_naive(
data, tz=tz, copy=copy, ambiguous=ambiguous
)
Expand All @@ -2299,6 +2283,7 @@ def _sequence_to_dt64(
if data.dtype != INT64_DTYPE:
data = data.astype(np.int64, copy=False)
copy = False
data = cast(np.ndarray, data)
result = data.view(out_dtype)

if copy:
Expand All @@ -2308,7 +2293,7 @@ def _sequence_to_dt64(
assert result.dtype.kind == "M"
assert result.dtype != "M8"
assert is_supported_unit(get_unit_from_dtype(result.dtype))
return result, tz, inferred_freq
return result, tz


def _construct_from_dt64_naive(
Expand Down
29 changes: 5 additions & 24 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
is_supported_unit,
npy_unit_to_abbrev,
periods_per_second,
to_offset,
)
from pandas._libs.tslibs.conversion import precision_from_unit
from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit
Expand Down Expand Up @@ -236,9 +235,7 @@ def _from_sequence(cls, data, *, dtype=None, copy: bool = False) -> Self:
if dtype:
dtype = _validate_td64_dtype(dtype)

data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=None)
freq, _ = dtl.validate_inferred_freq(None, inferred_freq, False)
freq = cast("Tick | None", freq)
data, freq = sequence_to_td64ns(data, copy=copy, unit=None)

if dtype is not None:
data = astype_overflowsafe(data, dtype=dtype, copy=False)
Expand All @@ -256,38 +253,22 @@ def _from_sequence_not_strict(
unit=None,
) -> Self:
"""
A non-strict version of _from_sequence, called from TimedeltaIndex.__new__.
_from_sequence_not_strict but without responsibility for finding the
result's `freq`.
"""
if dtype:
dtype = _validate_td64_dtype(dtype)

assert unit not in ["Y", "y", "M"] # caller is responsible for checking

explicit_none = freq is None
freq = freq if freq is not lib.no_default else None

freq, freq_infer = dtl.maybe_infer_freq(freq)

data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)
freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer)
freq = cast("Tick | None", freq)
if explicit_none:
freq = None

if dtype is not None:
data = astype_overflowsafe(data, dtype=dtype, copy=False)

result = cls._simple_new(data, dtype=data.dtype, freq=freq)

if inferred_freq is None and freq is not None:
# this condition precludes `freq_infer`
cls._validate_frequency(result, freq)

elif freq_infer:
# Set _freq directly to bypass duplicative _validate_frequency
# check.
result._freq = to_offset(result.inferred_freq)
result = cls._simple_new(data, dtype=data.dtype, freq=inferred_freq)

result._maybe_pin_freq(freq, {})
return result

# Signature of "_generate_range" incompatible with supertype
Expand Down