Skip to content

Commit e6a6064

Browse files
author
MarcoGorelli
committed
fix resample non-nano out-of-nano-bounds
1 parent b9a4335 commit e6a6064

File tree

2 files changed

+61
-23
lines changed

2 files changed

+61
-23
lines changed

pandas/core/resample.py

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1731,6 +1731,7 @@ def _get_time_bins(self, ax: DatetimeIndex):
17311731
ax.min(),
17321732
ax.max(),
17331733
self.freq,
1734+
unit=ax.unit,
17341735
closed=self.closed,
17351736
origin=self.origin,
17361737
offset=self.offset,
@@ -1750,7 +1751,8 @@ def _get_time_bins(self, ax: DatetimeIndex):
17501751
name=ax.name,
17511752
ambiguous=True,
17521753
nonexistent="shift_forward",
1753-
).as_unit(ax.unit)
1754+
unit=ax.unit,
1755+
)
17541756

17551757
ax_values = ax.asi8
17561758
binner, bin_edges = self._adjust_bin_edges(binner, ax_values)
@@ -1960,6 +1962,7 @@ def _get_timestamp_range_edges(
19601962
first: Timestamp,
19611963
last: Timestamp,
19621964
freq: BaseOffset,
1965+
unit: str,
19631966
closed: Literal["right", "left"] = "left",
19641967
origin: TimeGrouperOrigin = "start_day",
19651968
offset: Timedelta | None = None,
@@ -2015,7 +2018,7 @@ def _get_timestamp_range_edges(
20152018
origin = origin.tz_localize(None)
20162019

20172020
first, last = _adjust_dates_anchored(
2018-
first, last, freq, closed=closed, origin=origin, offset=offset
2021+
first, last, freq, closed=closed, origin=origin, offset=offset, unit=unit
20192022
)
20202023
if isinstance(freq, Day):
20212024
first = first.tz_localize(index_tz)
@@ -2082,7 +2085,7 @@ def _get_period_range_edges(
20822085
adjust_last = freq.is_on_offset(last_ts)
20832086

20842087
first_ts, last_ts = _get_timestamp_range_edges(
2085-
first_ts, last_ts, freq, closed=closed, origin=origin, offset=offset
2088+
first_ts, last_ts, freq, unit="ns", closed=closed, origin=origin, offset=offset
20862089
)
20872090

20882091
first = (first_ts + int(adjust_first) * freq).to_period(freq)
@@ -2115,32 +2118,42 @@ def _adjust_dates_anchored(
21152118
closed: Literal["right", "left"] = "right",
21162119
origin: TimeGrouperOrigin = "start_day",
21172120
offset: Timedelta | None = None,
2121+
unit="ns",
21182122
) -> tuple[Timestamp, Timestamp]:
21192123
# First and last offsets should be calculated from the start day to fix an
21202124
# error cause by resampling across multiple days when a one day period is
21212125
# not a multiple of the frequency. See GH 8683
21222126
# To handle frequencies that are not multiple or divisible by a day we let
21232127
# the possibility to define a fixed origin timestamp. See GH 31809
2124-
first = first.as_unit("ns")
2125-
last = last.as_unit("ns")
2128+
first = first.as_unit(unit)
2129+
last = last.as_unit(unit)
21262130
if offset is not None:
2127-
offset = offset.as_unit("ns")
2128-
2129-
origin_nanos = 0 # origin == "epoch"
2131+
offset = offset.as_unit(unit)
2132+
2133+
# TODO is there anything which can be reused here?
2134+
freq_value = freq.nanos
2135+
if unit == "us":
2136+
freq_value = freq_value // 1_000
2137+
elif unit == "ms":
2138+
freq_value = freq_value // 1_000_000
2139+
elif unit == "s":
2140+
freq_value = freq_value // 1_000_000_000
2141+
2142+
origin_timestamp = 0 # origin == "epoch"
21302143
if origin == "start_day":
2131-
origin_nanos = first.normalize()._value
2144+
origin_timestamp = first.normalize()._value
21322145
elif origin == "start":
2133-
origin_nanos = first._value
2146+
origin_timestamp = first._value
21342147
elif isinstance(origin, Timestamp):
2135-
origin_nanos = origin.as_unit("ns")._value
2148+
origin_timestamp = origin.as_unit(unit)._value
21362149
elif origin in ["end", "end_day"]:
21372150
origin_last = last if origin == "end" else last.ceil("D")
2138-
sub_freq_times = (origin_last._value - first._value) // freq.nanos
2151+
sub_freq_times = (origin_last._value - first._value) // freq_value
21392152
if closed == "left":
21402153
sub_freq_times += 1
21412154
first = origin_last - sub_freq_times * freq
2142-
origin_nanos = first._value
2143-
origin_nanos += offset._value if offset else 0
2155+
origin_timestamp = first._value
2156+
origin_timestamp += offset._value if offset else 0
21442157

21452158
# GH 10117 & GH 19375. If first and last contain timezone information,
21462159
# Perform the calculation in UTC in order to avoid localizing on an
@@ -2152,19 +2165,19 @@ def _adjust_dates_anchored(
21522165
if last_tzinfo is not None:
21532166
last = last.tz_convert("UTC")
21542167

2155-
foffset = (first._value - origin_nanos) % freq.nanos
2156-
loffset = (last._value - origin_nanos) % freq.nanos
2168+
foffset = (first._value - origin_timestamp) % freq_value
2169+
loffset = (last._value - origin_timestamp) % freq_value
21572170

21582171
if closed == "right":
21592172
if foffset > 0:
21602173
# roll back
21612174
fresult_int = first._value - foffset
21622175
else:
2163-
fresult_int = first._value - freq.nanos
2176+
fresult_int = first._value - freq_value
21642177

21652178
if loffset > 0:
21662179
# roll forward
2167-
lresult_int = last._value + (freq.nanos - loffset)
2180+
lresult_int = last._value + (freq_value - loffset)
21682181
else:
21692182
# already the end of the road
21702183
lresult_int = last._value
@@ -2177,11 +2190,11 @@ def _adjust_dates_anchored(
21772190

21782191
if loffset > 0:
21792192
# roll forward
2180-
lresult_int = last._value + (freq.nanos - loffset)
2193+
lresult_int = last._value + (freq_value - loffset)
21812194
else:
2182-
lresult_int = last._value + freq.nanos
2183-
fresult = Timestamp(fresult_int)
2184-
lresult = Timestamp(lresult_int)
2195+
lresult_int = last._value + freq_value
2196+
fresult = Timestamp(fresult_int, unit=unit)
2197+
lresult = Timestamp(lresult_int, unit=unit)
21852198
if first_tzinfo is not None:
21862199
fresult = fresult.tz_localize("UTC").tz_convert(first_tzinfo)
21872200
if last_tzinfo is not None:

pandas/tests/resample/test_datetime_index.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1838,7 +1838,7 @@ def test_get_timestamp_range_edges(first, last, freq, exp_first, exp_last, unit)
18381838
exp_last = Timestamp(exp_last)
18391839

18401840
freq = pd.tseries.frequencies.to_offset(freq)
1841-
result = _get_timestamp_range_edges(first, last, freq)
1841+
result = _get_timestamp_range_edges(first, last, freq, unit="ns")
18421842
expected = (exp_first, exp_last)
18431843
assert result == expected
18441844

@@ -1949,3 +1949,28 @@ def test_resample_unsigned_int(any_unsigned_int_numpy_dtype, unit):
19491949
),
19501950
)
19511951
tm.assert_frame_equal(result, expected)
1952+
1953+
1954+
def test_long_rule_non_nano():
1955+
# https://github.com/pandas-dev/pandas/issues/51024
1956+
idx = date_range("0300-01-01", "2000-01-01", unit="s", freq="100Y")
1957+
ser = Series([1, 4, 2, 8, 5, 7, 1, 4, 2, 8, 5, 7, 1, 4, 2, 8, 5], index=idx)
1958+
result = ser.resample("200Y").mean()
1959+
expected_idx = DatetimeIndex(
1960+
np.array(
1961+
[
1962+
"0300-12-31",
1963+
"0500-12-31",
1964+
"0700-12-31",
1965+
"0900-12-31",
1966+
"1100-12-31",
1967+
"1300-12-31",
1968+
"1500-12-31",
1969+
"1700-12-31",
1970+
"1900-12-31",
1971+
]
1972+
).astype("datetime64[s]"),
1973+
freq="200A-DEC",
1974+
)
1975+
expected = Series([1.0, 3.0, 6.5, 4.0, 3.0, 6.5, 4.0, 3.0, 6.5], index=expected_idx)
1976+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)