Skip to content

CLN: de-duplicate code in _matplotlib.converter #55155

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,6 @@ cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) noexcept nogil:
return NPY_DATETIMEUNIT.NPY_FR_D


# TODO: use in _matplotlib.converter?
cpdef int64_t periods_per_day(
NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns
) except? -1:
Expand Down
202 changes: 101 additions & 101 deletions pandas/plotting/_matplotlib/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from typing import (
TYPE_CHECKING,
Any,
Final,
cast,
)

Expand All @@ -30,8 +29,14 @@
Timestamp,
to_offset,
)
from pandas._libs.tslibs.dtypes import FreqGroup
from pandas._typing import F
from pandas._libs.tslibs.dtypes import (
FreqGroup,
periods_per_day,
)
from pandas._typing import (
F,
npt,
)

from pandas.core.dtypes.common import (
is_float,
Expand Down Expand Up @@ -60,15 +65,6 @@

from pandas._libs.tslibs.offsets import BaseOffset

# constants
HOURS_PER_DAY: Final = 24.0
MIN_PER_HOUR: Final = 60.0
SEC_PER_MIN: Final = 60.0

SEC_PER_HOUR: Final = SEC_PER_MIN * MIN_PER_HOUR
SEC_PER_DAY: Final = SEC_PER_HOUR * HOURS_PER_DAY

MUSEC_PER_DAY: Final = 10**6 * SEC_PER_DAY

_mpl_units = {} # Cache for units overwritten by us

Expand Down Expand Up @@ -495,7 +491,7 @@ def _get_default_annual_spacing(nyears) -> tuple[int, int]:
return (min_spacing, maj_spacing)


def period_break(dates: PeriodIndex, period: str) -> np.ndarray:
def _period_break(dates: PeriodIndex, period: str) -> npt.NDArray[np.intp]:
"""
Returns the indices where the given period changes.

Expand All @@ -506,12 +502,17 @@ def period_break(dates: PeriodIndex, period: str) -> np.ndarray:
period : str
Name of the period to monitor.
"""
mask = _period_break_mask(dates, period)
return np.nonzero(mask)[0]


def _period_break_mask(dates: PeriodIndex, period: str) -> npt.NDArray[np.bool_]:
current = getattr(dates, period)
previous = getattr(dates - 1 * dates.freq, period)
return np.nonzero(current - previous)[0]
return current != previous


def has_level_label(label_flags: np.ndarray, vmin: float) -> bool:
def has_level_label(label_flags: npt.NDArray[np.intp], vmin: float) -> bool:
"""
Returns true if the ``label_flags`` indicate there is at least one label
for this level.
Expand All @@ -527,54 +528,59 @@ def has_level_label(label_flags: np.ndarray, vmin: float) -> bool:
return True


def _daily_finder(vmin, vmax, freq: BaseOffset):
def _get_periods_per_ymd(freq: BaseOffset) -> tuple[int, int, int]:
# error: "BaseOffset" has no attribute "_period_dtype_code"
dtype_code = freq._period_dtype_code # type: ignore[attr-defined]
freq_group = FreqGroup.from_period_dtype_code(dtype_code)

periodsperday = -1
ppd = -1 # placeholder for above-day freqs

if dtype_code >= FreqGroup.FR_HR.value:
if freq_group == FreqGroup.FR_NS:
periodsperday = 24 * 60 * 60 * 1000000000
elif freq_group == FreqGroup.FR_US:
periodsperday = 24 * 60 * 60 * 1000000
elif freq_group == FreqGroup.FR_MS:
periodsperday = 24 * 60 * 60 * 1000
elif freq_group == FreqGroup.FR_SEC:
periodsperday = 24 * 60 * 60
elif freq_group == FreqGroup.FR_MIN:
periodsperday = 24 * 60
elif freq_group == FreqGroup.FR_HR:
periodsperday = 24
else: # pragma: no cover
raise ValueError(f"unexpected frequency: {dtype_code}")
periodsperyear = 365 * periodsperday
periodspermonth = 28 * periodsperday

# error: "BaseOffset" has no attribute "_creso"
ppd = periods_per_day(freq._creso) # type: ignore[attr-defined]
ppm = 28 * ppd
ppy = 365 * ppd
elif freq_group == FreqGroup.FR_BUS:
periodsperyear = 261
periodspermonth = 19
ppm = 19
ppy = 261
elif freq_group == FreqGroup.FR_DAY:
periodsperyear = 365
periodspermonth = 28
ppm = 28
ppy = 365
elif freq_group == FreqGroup.FR_WK:
periodsperyear = 52
periodspermonth = 3
else: # pragma: no cover
raise ValueError("unexpected frequency")
ppm = 3
ppy = 52
elif freq_group == FreqGroup.FR_MTH:
ppm = 1
ppy = 12
elif freq_group == FreqGroup.FR_QTR:
ppm = -1 # placerholder
ppy = 4
elif freq_group == FreqGroup.FR_ANN:
ppm = -1 # placeholder
ppy = 1
else:
raise NotImplementedError(f"Unsupported frequency: {dtype_code}")

return ppd, ppm, ppy


def _daily_finder(vmin, vmax, freq: BaseOffset) -> np.ndarray:
# error: "BaseOffset" has no attribute "_period_dtype_code"
dtype_code = freq._period_dtype_code # type: ignore[attr-defined]

periodsperday, periodspermonth, periodsperyear = _get_periods_per_ymd(freq)

# save this for later usage
vmin_orig = vmin
(vmin, vmax) = (int(vmin), int(vmax))
span = vmax - vmin + 1

(vmin, vmax) = (
Period(ordinal=int(vmin), freq=freq),
Period(ordinal=int(vmax), freq=freq),
dates_ = period_range(
start=Period(ordinal=vmin, freq=freq),
end=Period(ordinal=vmax, freq=freq),
freq=freq,
)
assert isinstance(vmin, Period)
assert isinstance(vmax, Period)
span = vmax.ordinal - vmin.ordinal + 1
dates_ = period_range(start=vmin, end=vmax, freq=freq)

# Initialize the output
info = np.zeros(
span, dtype=[("val", np.int64), ("maj", bool), ("min", bool), ("fmt", "|S20")]
Expand All @@ -595,45 +601,38 @@ def first_label(label_flags):

# Case 1. Less than a month
if span <= periodspermonth:
day_start = period_break(dates_, "day")
month_start = period_break(dates_, "month")
day_start = _period_break(dates_, "day")
month_start = _period_break(dates_, "month")
year_start = _period_break(dates_, "year")

def _hour_finder(label_interval, force_year_start) -> None:
_hour = dates_.hour
_prev_hour = (dates_ - 1 * dates_.freq).hour
hour_start = (_hour - _prev_hour) != 0
def _hour_finder(label_interval: int, force_year_start: bool) -> None:
target = dates_.hour
mask = _period_break_mask(dates_, "hour")
info_maj[day_start] = True
info_min[hour_start & (_hour % label_interval == 0)] = True
year_start = period_break(dates_, "year")
info_fmt[hour_start & (_hour % label_interval == 0)] = "%H:%M"
info_min[mask & (target % label_interval == 0)] = True
info_fmt[mask & (target % label_interval == 0)] = "%H:%M"
info_fmt[day_start] = "%H:%M\n%d-%b"
info_fmt[year_start] = "%H:%M\n%d-%b\n%Y"
if force_year_start and not has_level_label(year_start, vmin_orig):
info_fmt[first_label(day_start)] = "%H:%M\n%d-%b\n%Y"

def _minute_finder(label_interval) -> None:
hour_start = period_break(dates_, "hour")
_minute = dates_.minute
_prev_minute = (dates_ - 1 * dates_.freq).minute
minute_start = (_minute - _prev_minute) != 0
def _minute_finder(label_interval: int) -> None:
target = dates_.minute
hour_start = _period_break(dates_, "hour")
mask = _period_break_mask(dates_, "minute")
info_maj[hour_start] = True
info_min[minute_start & (_minute % label_interval == 0)] = True
year_start = period_break(dates_, "year")
info_fmt = info["fmt"]
info_fmt[minute_start & (_minute % label_interval == 0)] = "%H:%M"
info_min[mask & (target % label_interval == 0)] = True
info_fmt[mask & (target % label_interval == 0)] = "%H:%M"
info_fmt[day_start] = "%H:%M\n%d-%b"
info_fmt[year_start] = "%H:%M\n%d-%b\n%Y"

def _second_finder(label_interval) -> None:
minute_start = period_break(dates_, "minute")
_second = dates_.second
_prev_second = (dates_ - 1 * dates_.freq).second
second_start = (_second - _prev_second) != 0
info["maj"][minute_start] = True
info["min"][second_start & (_second % label_interval == 0)] = True
year_start = period_break(dates_, "year")
info_fmt = info["fmt"]
info_fmt[second_start & (_second % label_interval == 0)] = "%H:%M:%S"
def _second_finder(label_interval: int) -> None:
target = dates_.second
minute_start = _period_break(dates_, "minute")
mask = _period_break_mask(dates_, "second")
info_maj[minute_start] = True
info_min[mask & (target % label_interval == 0)] = True
info_fmt[mask & (target % label_interval == 0)] = "%H:%M:%S"
info_fmt[day_start] = "%H:%M:%S\n%d-%b"
info_fmt[year_start] = "%H:%M:%S\n%d-%b\n%Y"

Expand Down Expand Up @@ -672,8 +671,6 @@ def _second_finder(label_interval) -> None:
else:
info_maj[month_start] = True
info_min[day_start] = True
year_start = period_break(dates_, "year")
info_fmt = info["fmt"]
info_fmt[day_start] = "%d"
info_fmt[month_start] = "%d\n%b"
info_fmt[year_start] = "%d\n%b\n%Y"
Expand All @@ -685,15 +682,15 @@ def _second_finder(label_interval) -> None:

# Case 2. Less than three months
elif span <= periodsperyear // 4:
month_start = period_break(dates_, "month")
month_start = _period_break(dates_, "month")
info_maj[month_start] = True
if dtype_code < FreqGroup.FR_HR.value:
info["min"] = True
else:
day_start = period_break(dates_, "day")
day_start = _period_break(dates_, "day")
info["min"][day_start] = True
week_start = period_break(dates_, "week")
year_start = period_break(dates_, "year")
week_start = _period_break(dates_, "week")
year_start = _period_break(dates_, "year")
info_fmt[week_start] = "%d"
info_fmt[month_start] = "\n\n%b"
info_fmt[year_start] = "\n\n%b\n%Y"
Expand All @@ -704,9 +701,9 @@ def _second_finder(label_interval) -> None:
info_fmt[first_label(month_start)] = "\n\n%b\n%Y"
# Case 3. Less than 14 months ...............
elif span <= 1.15 * periodsperyear:
year_start = period_break(dates_, "year")
month_start = period_break(dates_, "month")
week_start = period_break(dates_, "week")
year_start = _period_break(dates_, "year")
month_start = _period_break(dates_, "month")
week_start = _period_break(dates_, "week")
info_maj[month_start] = True
info_min[week_start] = True
info_min[year_start] = False
Expand All @@ -717,17 +714,17 @@ def _second_finder(label_interval) -> None:
info_fmt[first_label(month_start)] = "%b\n%Y"
# Case 4. Less than 2.5 years ...............
elif span <= 2.5 * periodsperyear:
year_start = period_break(dates_, "year")
quarter_start = period_break(dates_, "quarter")
month_start = period_break(dates_, "month")
year_start = _period_break(dates_, "year")
quarter_start = _period_break(dates_, "quarter")
month_start = _period_break(dates_, "month")
info_maj[quarter_start] = True
info_min[month_start] = True
info_fmt[quarter_start] = "%b"
info_fmt[year_start] = "%b\n%Y"
# Case 4. Less than 4 years .................
elif span <= 4 * periodsperyear:
year_start = period_break(dates_, "year")
month_start = period_break(dates_, "month")
year_start = _period_break(dates_, "year")
month_start = _period_break(dates_, "month")
info_maj[year_start] = True
info_min[month_start] = True
info_min[year_start] = False
Expand All @@ -738,15 +735,15 @@ def _second_finder(label_interval) -> None:
info_fmt[year_start] = "%b\n%Y"
# Case 5. Less than 11 years ................
elif span <= 11 * periodsperyear:
year_start = period_break(dates_, "year")
quarter_start = period_break(dates_, "quarter")
year_start = _period_break(dates_, "year")
quarter_start = _period_break(dates_, "quarter")
info_maj[year_start] = True
info_min[quarter_start] = True
info_min[year_start] = False
info_fmt[year_start] = "%Y"
# Case 6. More than 12 years ................
else:
year_start = period_break(dates_, "year")
year_start = _period_break(dates_, "year")
year_break = dates_[year_start].year
nyears = span / periodsperyear
(min_anndef, maj_anndef) = _get_default_annual_spacing(nyears)
Expand All @@ -759,8 +756,8 @@ def _second_finder(label_interval) -> None:
return info


def _monthly_finder(vmin, vmax, freq):
periodsperyear = 12
def _monthly_finder(vmin, vmax, freq: BaseOffset) -> np.ndarray:
_, _, periodsperyear = _get_periods_per_ymd(freq)

vmin_orig = vmin
(vmin, vmax) = (int(vmin), int(vmax))
Expand Down Expand Up @@ -795,6 +792,7 @@ def _monthly_finder(vmin, vmax, freq):
quarter_start = (dates_ % 3 == 0).nonzero()
info_maj[year_start] = True
# TODO: Check the following : is it really info['fmt'] ?
# 2023-09-15 this is reached in test_finder_monthly
info["fmt"][quarter_start] = True
info["min"] = True

Expand Down Expand Up @@ -829,8 +827,8 @@ def _monthly_finder(vmin, vmax, freq):
return info


def _quarterly_finder(vmin, vmax, freq):
periodsperyear = 4
def _quarterly_finder(vmin, vmax, freq: BaseOffset) -> np.ndarray:
_, _, periodsperyear = _get_periods_per_ymd(freq)
vmin_orig = vmin
(vmin, vmax) = (int(vmin), int(vmax))
span = vmax - vmin + 1
Expand Down Expand Up @@ -876,7 +874,8 @@ def _quarterly_finder(vmin, vmax, freq):
return info


def _annual_finder(vmin, vmax, freq):
def _annual_finder(vmin, vmax, freq: BaseOffset) -> np.ndarray:
# Note: small difference here vs other finders in adding 1 to vmax
(vmin, vmax) = (int(vmin), int(vmax + 1))
span = vmax - vmin + 1

Expand All @@ -889,8 +888,9 @@ def _annual_finder(vmin, vmax, freq):

(min_anndef, maj_anndef) = _get_default_annual_spacing(span)
major_idx = dates_ % maj_anndef == 0
minor_idx = dates_ % min_anndef == 0
info["maj"][major_idx] = True
info["min"][(dates_ % min_anndef == 0)] = True
info["min"][minor_idx] = True
info["fmt"][major_idx] = "%Y"

return info
Expand Down Expand Up @@ -1087,7 +1087,7 @@ def format_timedelta_ticks(x, pos, n_decimals: int) -> str:
"""
Convert seconds to 'D days HH:MM:SS.F'
"""
s, ns = divmod(x, 10**9)
s, ns = divmod(x, 10**9) # TODO(non-nano): this looks like it assumes ns
m, s = divmod(s, 60)
h, m = divmod(m, 60)
d, h = divmod(h, 24)
Expand Down
Loading