Skip to content

BUG: Timestamp.round floating point error #39244

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ Datetimelike
- Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`)
- Bug in :meth:`Series.where` incorrectly casting ``datetime64`` values to ``int64`` (:issue:`37682`)
- Bug in :class:`Categorical` incorrectly typecasting ``datetime`` object to ``Timestamp`` (:issue:`38878`)
- Bug in :meth:`Timestamp.round`, :meth:`Timestamp.floor`, :meth:`Timestamp.ceil` for values near the implementation bounds of :class:`Timestamp` (:issue:`39244`)
- Bug in :func:`date_range` incorrectly creating :class:`DatetimeIndex` containing ``NaT`` instead of raising ``OutOfBoundsDatetime`` in corner cases (:issue:`24124`)

Timedelta
Expand Down
57 changes: 48 additions & 9 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ shadows the python class, where we do any heavy lifting.
"""
import warnings

cimport cython

import numpy as np

cimport numpy as cnp
Expand Down Expand Up @@ -153,32 +155,69 @@ class RoundTo:
return 4


cdef inline _floor_int64(values, unit):
return values - np.remainder(values, unit)
cdef inline ndarray[int64_t] _floor_int64(int64_t[:] values, int64_t unit):
cdef:
Py_ssize_t i, n = len(values)
ndarray[int64_t] result = np.empty(n, dtype="i8")
int64_t res, value

with cython.overflowcheck(True):
for i in range(n):
value = values[i]
if value == NPY_NAT:
res = NPY_NAT
else:
res = value - value % unit
result[i] = res

return result


cdef inline ndarray[int64_t] _ceil_int64(int64_t[:] values, int64_t unit):
cdef:
Py_ssize_t i, n = len(values)
ndarray[int64_t] result = np.empty(n, dtype="i8")
int64_t res, value

cdef inline _ceil_int64(values, unit):
return values + np.remainder(-values, unit)
with cython.overflowcheck(True):
for i in range(n):
value = values[i]

cdef inline _rounddown_int64(values, unit):
if value == NPY_NAT:
res = NPY_NAT
else:
remainder = value % unit
if remainder == 0:
res = value
else:
res = value + (unit - remainder)

result[i] = res

return result


cdef inline ndarray[int64_t] _rounddown_int64(values, int64_t unit):
return _ceil_int64(values - unit//2, unit)

cdef inline _roundup_int64(values, unit):

cdef inline ndarray[int64_t] _roundup_int64(values, int64_t unit):
return _floor_int64(values + unit//2, unit)


def round_nsint64(values, mode, freq):
def round_nsint64(values: np.ndarray, mode: RoundTo, freq) -> np.ndarray:
"""
Applies rounding mode at given frequency

Parameters
----------
values : :obj:`ndarray`
values : np.ndarray[int64_t]`
mode : instance of `RoundTo` enumeration
freq : str, obj

Returns
-------
:obj:`ndarray`
np.ndarray[int64_t]
"""

unit = to_offset(freq).nanos
Expand Down
79 changes: 78 additions & 1 deletion pandas/tests/scalar/timestamp/test_unary_ops.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from datetime import datetime

from dateutil.tz import gettz
import numpy as np
import pytest
import pytz
from pytz import utc

from pandas._libs.tslibs import NaT, Timestamp, conversion, to_offset
from pandas._libs.tslibs import NaT, Timedelta, Timestamp, conversion, to_offset
from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG
import pandas.util._test_decorators as td

Expand Down Expand Up @@ -247,6 +248,82 @@ def test_round_int64(self, timestamp, freq):
# round half to even
assert result.value // unit % 2 == 0, "round half to even error"

def test_round_implementation_bounds(self):
# See also: analogous test for Timedelta
result = Timestamp.min.ceil("s")
expected = Timestamp(1677, 9, 21, 0, 12, 44)
assert result == expected

result = Timestamp.max.floor("s")
expected = Timestamp.max - Timedelta(854775807)
assert result == expected

with pytest.raises(OverflowError, match="value too large"):
Timestamp.min.floor("s")

# the second message here shows up in windows builds
msg = "|".join(
["Python int too large to convert to C long", "int too big to convert"]
)
with pytest.raises(OverflowError, match=msg):
Timestamp.max.ceil("s")

@pytest.mark.parametrize("n", range(100))
@pytest.mark.parametrize(
"method", [Timestamp.round, Timestamp.floor, Timestamp.ceil]
)
def test_round_sanity(self, method, n):
iinfo = np.iinfo(np.int64)
val = np.random.randint(iinfo.min + 1, iinfo.max, dtype=np.int64)
ts = Timestamp(val)

def checker(res, ts, nanos):
if method is Timestamp.round:
diff = np.abs((res - ts).value)
assert diff <= nanos / 2
elif method is Timestamp.floor:
assert res <= ts
elif method is Timestamp.ceil:
assert res >= ts

assert method(ts, "ns") == ts

res = method(ts, "us")
nanos = 1000
assert np.abs((res - ts).value) < nanos
assert res.value % nanos == 0
checker(res, ts, nanos)

res = method(ts, "ms")
nanos = 1_000_000
assert np.abs((res - ts).value) < nanos
assert res.value % nanos == 0
checker(res, ts, nanos)

res = method(ts, "s")
nanos = 1_000_000_000
assert np.abs((res - ts).value) < nanos
assert res.value % nanos == 0
checker(res, ts, nanos)

res = method(ts, "min")
nanos = 60 * 1_000_000_000
assert np.abs((res - ts).value) < nanos
assert res.value % nanos == 0
checker(res, ts, nanos)

res = method(ts, "h")
nanos = 60 * 60 * 1_000_000_000
assert np.abs((res - ts).value) < nanos
assert res.value % nanos == 0
checker(res, ts, nanos)

res = method(ts, "D")
nanos = 24 * 60 * 60 * 1_000_000_000
assert np.abs((res - ts).value) < nanos
assert res.value % nanos == 0
checker(res, ts, nanos)

# --------------------------------------------------------------
# Timestamp.replace

Expand Down