Skip to content

Commit 4ad979a

Browse files
committed
BUG: fix a timezone bug between origin and index on df.resample
1 parent 3fc2bf6 commit 4ad979a

File tree

4 files changed

+32
-28
lines changed

4 files changed

+32
-28
lines changed

pandas/core/generic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7821,8 +7821,8 @@ def resample(
78217821
For a MultiIndex, level (name or number) to use for
78227822
resampling. `level` must be datetime-like.
78237823
origin : {'epoch', 'start', 'start_day'}, Timestamp or str, default 'start_day'
7824-
The timestamp on which to adjust the grouping. It must be timezone aware if
7825-
the index of the resampled data is.
7824+
The timestamp on which to adjust the grouping. The timezone of origin
7825+
must match the timezone of the index.
78267826
If a timestamp is not used, these values are also supported:
78277827
78287828
- 'epoch': `origin` is 1970-01-01

pandas/core/groupby/grouper.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@ class Grouper:
8484
See: :class:`DataFrame.resample`
8585
8686
origin : {'epoch', 'start', 'start_day'}, Timestamp or str, default 'start_day'
87-
The timestamp on which to adjust the grouping. It must be timezone aware if
88-
the index of the resampled data is.
87+
The timestamp on which to adjust the grouping. The timezone of origin must
88+
match the timezone of the index.
8989
If a timestamp is not used, these values are also supported:
9090
9191
- 'epoch': `origin` is 1970-01-01

pandas/core/resample.py

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1666,10 +1666,11 @@ def _get_timestamp_range_edges(
16661666
The dateoffset to which the Timestamps will be adjusted.
16671667
closed : {'right', 'left'}, default None
16681668
Which side of bin interval is closed.
1669-
origin : {'epoch', 'start', 'start_day'}, Timestamp or str, default 'start_day'
1670-
The timestamp on which to adjust the grouping. It must be timezone aware if
1671-
the index of the resampled data is.
1669+
origin : {'epoch', 'start', 'start_day'} or Timestamp, default 'start_day'
1670+
The timestamp on which to adjust the grouping. The timezone of origin must
1671+
match the timezone of the index.
16721672
If a timestamp is not used, these values are also supported:
1673+
16731674
- 'epoch': `origin` is 1970-01-01
16741675
- 'start': `origin` is the first value of the timeseries
16751676
- 'start_day': `origin` is the first day at midnight of the timeseries
@@ -1680,38 +1681,34 @@ def _get_timestamp_range_edges(
16801681
-------
16811682
A tuple of length 2, containing the adjusted pd.Timestamp objects.
16821683
"""
1683-
if isinstance(freq, Tick):
1684-
if origin not in {"epoch", "start", "start_day"}:
1685-
is_idx_tz_aware = first.tz is not None or last.tz is not None
1686-
if origin.tz is None and is_idx_tz_aware:
1687-
raise ValueError("The origin must have the same timezone as the index.")
1684+
index_tz = first.tz
1685+
if isinstance(origin, Timestamp) and (origin.tz is None) != (index_tz is None):
1686+
raise ValueError("The origin must have the same timezone as the index.")
16881687

1688+
if isinstance(freq, Tick):
16891689
if isinstance(freq, Day):
16901690
# _adjust_dates_anchored assumes 'D' means 24H, but first/last
16911691
# might contain a DST transition (23H, 24H, or 25H).
16921692
# So "pretend" the dates are naive when adjusting the endpoints
1693-
tz = first.tz
16941693
first = first.tz_localize(None)
16951694
last = last.tz_localize(None)
16961695

16971696
first, last = _adjust_dates_anchored(
16981697
first, last, freq, closed=closed, origin=origin, offset=offset,
16991698
)
17001699
if isinstance(freq, Day):
1701-
first = first.tz_localize(tz)
1702-
last = last.tz_localize(tz)
1703-
return first, last
1704-
1700+
first = first.tz_localize(index_tz)
1701+
last = last.tz_localize(index_tz)
17051702
else:
17061703
first = first.normalize()
17071704
last = last.normalize()
17081705

1709-
if closed == "left":
1710-
first = Timestamp(freq.rollback(first))
1711-
else:
1712-
first = Timestamp(first - freq)
1706+
if closed == "left":
1707+
first = Timestamp(freq.rollback(first))
1708+
else:
1709+
first = Timestamp(first - freq)
17131710

1714-
last = Timestamp(last + freq)
1711+
last = Timestamp(last + freq)
17151712

17161713
return first, last
17171714

@@ -1733,14 +1730,15 @@ def _get_period_range_edges(
17331730
The freq to which the Periods will be adjusted.
17341731
closed : {'right', 'left'}, default None
17351732
Which side of bin interval is closed.
1736-
origin : {'epoch', 'start', 'start_day'}, Timestamp or str, default 'start_day'
1737-
The timestamp on which to adjust the grouping. It must be timezone aware if
1738-
the index of the resampled data is.
1733+
origin : {'epoch', 'start', 'start_day'}, Timestamp, default 'start_day'
1734+
The timestamp on which to adjust the grouping. The timezone of origin must
1735+
match the timezone of the index.
17391736
17401737
If a timestamp is not used, these values are also supported:
1741-
- If 'epoch': `origin` is 1970-01-01
1742-
- If 'start': then `origin` is the first value of the timeseries
1743-
- If 'start_day', then `origin` is the first day at midnight of the timeseries
1738+
1739+
- 'epoch': `origin` is 1970-01-01
1740+
- 'start': `origin` is the first value of the timeseries
1741+
- 'start_day': `origin` is the first day at midnight of the timeseries
17441742
offset : pd.Timedelta, default is None
17451743
An offset timedelta added to the origin.
17461744

pandas/tests/resample/test_datetime_index.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -814,6 +814,12 @@ def test_resample_origin_with_tz():
814814
with pytest.raises(ValueError, match=msg):
815815
ts.resample("5min", origin="12/31/1999 23:57:00").mean()
816816

817+
# if the series is not tz aware, origin should not be tz aware
818+
rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s")
819+
ts = Series(np.random.randn(len(rng)), index=rng)
820+
with pytest.raises(ValueError, match=msg):
821+
ts.resample("5min", origin="12/31/1999 23:57:00+03:00").mean()
822+
817823

818824
def test_resample_daily_anchored():
819825
rng = date_range("1/1/2000 0:00:00", periods=10000, freq="T")

0 commit comments

Comments
 (0)