Skip to content

REF: lazify relativedelta imports #52659

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 3, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ from cpython.datetime cimport (

import_datetime()

from dateutil.easter import easter
from dateutil.relativedelta import relativedelta
import numpy as np

cimport numpy as cnp
Expand Down Expand Up @@ -348,6 +346,8 @@ cdef _determine_offset(kwds):
kwds_no_nanos["microseconds"] = kwds_no_nanos.get("microseconds", 0) + micro

if all(k in kwds_use_relativedelta for k in kwds_no_nanos):
from dateutil.relativedelta import relativedelta

return relativedelta(**kwds_no_nanos), True

raise ValueError(
Expand Down Expand Up @@ -3672,6 +3672,8 @@ cdef class Easter(SingleConstructorOffset):

@apply_wraps
def _apply(self, other: datetime) -> datetime:
from dateutil.easter import easter

current_easter = easter(other.year)
current_easter = datetime(
current_easter.year, current_easter.month, current_easter.day
Expand Down Expand Up @@ -3702,6 +3704,9 @@ cdef class Easter(SingleConstructorOffset):
def is_on_offset(self, dt: datetime) -> bool:
if self.normalize and not _is_normalized(dt):
return False

from dateutil.easter import easter

return date(dt.year, dt.month, dt.day) == easter(dt.year)


Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ from dateutil.parser import (
DEFAULTPARSER,
parse as du_parse,
)
from dateutil.relativedelta import relativedelta
from dateutil.tz import (
tzlocal as _dateutil_tzlocal,
tzoffset,
Expand Down Expand Up @@ -692,6 +691,9 @@ cdef datetime dateutil_parse(
) from err

if res.weekday is not None and not res.day:
assert False
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

woops, this wasn't supposed to make it into the PR. looks like we don't get here in the tests. And its a good thing, because L696 should just be relativedelta(...), not relativedelta.relativedelta(...)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we just deprecate this case since it isnt reached and would raise if it ever were?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This branch is always false because res.day is never 0 right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just never reached in tests. pd.Timestamp("2023 Sept Thu") will hit this and raise AttributeError: type object 'relativedelta' has no attribute 'relativedelta' in main

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah interesting. I thought res was always a datetime.datetime object here. (Also this doesn't make sense since res.weekday() should be passed instead.)

What's the result if we remove this branch? If it's sensible, we can remove it and call it a bug fix?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ret is a datetime object here, res is a parser.parser._result object

If we disable this check entirely then pd.Timestamp("2023 Sept Thu") behaves like Timestamp("2023-09-01") but I'm pretty sure if i run it tomorrow it will be 2023-09-02. Rather than skip this block entirely, I'd prefer to change it to raise intentionally.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay I think raising is reasonable as this format is a big ambiguous.

from dateutil.relativedelta import relativedelta

ret = ret + relativedelta.relativedelta(weekday=res.weekday)
if not ignoretz:
if res.tzname and res.tzname in time.tzname:
Expand Down
57 changes: 28 additions & 29 deletions pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
from __future__ import annotations

from collections import abc
import datetime
from datetime import (
datetime,
timedelta,
)
from io import BytesIO
import os
import struct
Expand All @@ -30,7 +33,6 @@
)
import warnings

from dateutil.relativedelta import relativedelta
import numpy as np

from pandas._libs.lib import infer_dtype
Expand Down Expand Up @@ -226,7 +228,7 @@
_date_formats = ["%tc", "%tC", "%td", "%d", "%tw", "%tm", "%tq", "%th", "%ty"]


stata_epoch: Final = datetime.datetime(1960, 1, 1)
stata_epoch: Final = datetime(1960, 1, 1)


# TODO: Add typing. As of January 2020 it is not possible to type this function since
Expand Down Expand Up @@ -279,8 +281,8 @@ def _stata_elapsed_date_to_datetime_vec(dates, fmt) -> Series:
years since 0000
"""
MIN_YEAR, MAX_YEAR = Timestamp.min.year, Timestamp.max.year
MAX_DAY_DELTA = (Timestamp.max - datetime.datetime(1960, 1, 1)).days
MIN_DAY_DELTA = (Timestamp.min - datetime.datetime(1960, 1, 1)).days
MAX_DAY_DELTA = (Timestamp.max - datetime(1960, 1, 1)).days
MIN_DAY_DELTA = (Timestamp.min - datetime(1960, 1, 1)).days
MIN_MS_DELTA = MIN_DAY_DELTA * 24 * 3600 * 1000
MAX_MS_DELTA = MAX_DAY_DELTA * 24 * 3600 * 1000

Expand All @@ -295,9 +297,7 @@ def convert_year_month_safe(year, month) -> Series:
return to_datetime(100 * year + month, format="%Y%m")
else:
index = getattr(year, "index", None)
return Series(
[datetime.datetime(y, m, 1) for y, m in zip(year, month)], index=index
)
return Series([datetime(y, m, 1) for y, m in zip(year, month)], index=index)

def convert_year_days_safe(year, days) -> Series:
"""
Expand All @@ -309,8 +309,7 @@ def convert_year_days_safe(year, days) -> Series:
else:
index = getattr(year, "index", None)
value = [
datetime.datetime(y, 1, 1) + relativedelta(days=int(d))
for y, d in zip(year, days)
datetime(y, 1, 1) + timedelta(days=int(d)) for y, d in zip(year, days)
]
return Series(value, index=index)

Expand All @@ -323,12 +322,12 @@ def convert_delta_safe(base, deltas, unit) -> Series:
index = getattr(deltas, "index", None)
if unit == "d":
if deltas.max() > MAX_DAY_DELTA or deltas.min() < MIN_DAY_DELTA:
values = [base + relativedelta(days=int(d)) for d in deltas]
values = [base + timedelta(days=int(d)) for d in deltas]
return Series(values, index=index)
elif unit == "ms":
if deltas.max() > MAX_MS_DELTA or deltas.min() < MIN_MS_DELTA:
values = [
base + relativedelta(microseconds=(int(d) * 1000)) for d in deltas
base + timedelta(microseconds=(int(d) * 1000)) for d in deltas
]
return Series(values, index=index)
else:
Expand Down Expand Up @@ -405,7 +404,7 @@ def _datetime_to_stata_elapsed_vec(dates: Series, fmt: str) -> Series:
Parameters
----------
dates : Series
Series or array containing datetime.datetime or datetime64[ns] to
Series or array containing datetime or datetime64[ns] to
convert to the Stata Internal Format given by fmt
fmt : str
The format to convert to. Can be, tc, td, tw, tm, tq, th, ty
Expand Down Expand Up @@ -436,7 +435,7 @@ def parse_dates_safe(
if delta:
delta = dates._values - stata_epoch

def f(x: datetime.timedelta) -> float:
def f(x: timedelta) -> float:
return US_PER_DAY * x.days + 1000000 * x.seconds + x.microseconds

v = np.vectorize(f)
Expand All @@ -447,15 +446,15 @@ def f(x: datetime.timedelta) -> float:
d["month"] = year_month._values - d["year"] * 100
if days:

def g(x: datetime.datetime) -> int:
return (x - datetime.datetime(x.year, 1, 1)).days
def g(x: datetime) -> int:
return (x - datetime(x.year, 1, 1)).days

v = np.vectorize(g)
d["days"] = v(dates)
else:
raise ValueError(
"Columns containing dates must contain either "
"datetime64, datetime.datetime or null values."
"datetime64, datetime or null values."
)

return DataFrame(d, index=index)
Expand Down Expand Up @@ -2293,7 +2292,7 @@ class StataWriter(StataParser):
* If datetimes contain timezone information
ValueError
* Columns listed in convert_dates are neither datetime64[ns]
or datetime.datetime
or datetime
* Column dtype is not representable in Stata
* Column listed in convert_dates is not in DataFrame
* Categorical label contains more than 32,000 characters
Expand Down Expand Up @@ -2326,7 +2325,7 @@ def __init__(
convert_dates: dict[Hashable, str] | None = None,
write_index: bool = True,
byteorder: str | None = None,
time_stamp: datetime.datetime | None = None,
time_stamp: datetime | None = None,
data_label: str | None = None,
variable_labels: dict[Hashable, str] | None = None,
compression: CompressionOptions = "infer",
Expand Down Expand Up @@ -2766,7 +2765,7 @@ def _write_value_labels(self) -> None:
def _write_header(
self,
data_label: str | None = None,
time_stamp: datetime.datetime | None = None,
time_stamp: datetime | None = None,
) -> None:
byteorder = self._byteorder
# ds_format - just use 114
Expand All @@ -2791,8 +2790,8 @@ def _write_header(
# time stamp, 18 bytes, char, null terminated
# format dd Mon yyyy hh:mm
if time_stamp is None:
time_stamp = datetime.datetime.now()
elif not isinstance(time_stamp, datetime.datetime):
time_stamp = datetime.now()
elif not isinstance(time_stamp, datetime):
raise ValueError("time_stamp should be datetime type")
# GH #13856
# Avoid locale-specific month conversion
Expand Down Expand Up @@ -3218,7 +3217,7 @@ class StataWriter117(StataWriter):
* If datetimes contain timezone information
ValueError
* Columns listed in convert_dates are neither datetime64[ns]
or datetime.datetime
or datetime
* Column dtype is not representable in Stata
* Column listed in convert_dates is not in DataFrame
* Categorical label contains more than 32,000 characters
Expand Down Expand Up @@ -3254,7 +3253,7 @@ def __init__(
convert_dates: dict[Hashable, str] | None = None,
write_index: bool = True,
byteorder: str | None = None,
time_stamp: datetime.datetime | None = None,
time_stamp: datetime | None = None,
data_label: str | None = None,
variable_labels: dict[Hashable, str] | None = None,
convert_strl: Sequence[Hashable] | None = None,
Expand Down Expand Up @@ -3299,7 +3298,7 @@ def _update_map(self, tag: str) -> None:
def _write_header(
self,
data_label: str | None = None,
time_stamp: datetime.datetime | None = None,
time_stamp: datetime | None = None,
) -> None:
"""Write the file header"""
byteorder = self._byteorder
Expand All @@ -3325,8 +3324,8 @@ def _write_header(
# time stamp, 18 bytes, char, null terminated
# format dd Mon yyyy hh:mm
if time_stamp is None:
time_stamp = datetime.datetime.now()
elif not isinstance(time_stamp, datetime.datetime):
time_stamp = datetime.now()
elif not isinstance(time_stamp, datetime):
raise ValueError("time_stamp should be datetime type")
# Avoid locale-specific month conversion
months = [
Expand Down Expand Up @@ -3610,7 +3609,7 @@ class StataWriterUTF8(StataWriter117):
* If datetimes contain timezone information
ValueError
* Columns listed in convert_dates are neither datetime64[ns]
or datetime.datetime
or datetime
* Column dtype is not representable in Stata
* Column listed in convert_dates is not in DataFrame
* Categorical label contains more than 32,000 characters
Expand Down Expand Up @@ -3647,7 +3646,7 @@ def __init__(
convert_dates: dict[Hashable, str] | None = None,
write_index: bool = True,
byteorder: str | None = None,
time_stamp: datetime.datetime | None = None,
time_stamp: datetime | None = None,
data_label: str | None = None,
variable_labels: dict[Hashable, str] | None = None,
convert_strl: Sequence[Hashable] | None = None,
Expand Down
3 changes: 2 additions & 1 deletion pandas/plotting/_matplotlib/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
cast,
)

from dateutil.relativedelta import relativedelta
import matplotlib.dates as mdates
from matplotlib.ticker import (
AutoLocator,
Expand Down Expand Up @@ -349,6 +348,8 @@ def __init__(self, locator, tz=None, defaultfmt: str = "%Y-%m-%d") -> None:
class PandasAutoDateLocator(mdates.AutoDateLocator):
def get_locator(self, dmin, dmax):
"""Pick the best locator based on a distance."""
from dateutil.relativedelta import relativedelta

delta = relativedelta(dmax, dmin)

num_days = (delta.years * 12.0 + delta.months) * 31.0 + delta.days
Expand Down