Skip to content

ENH: date_range support reso keyword #49106

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Nov 18, 2022
Merged
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,10 @@ Other enhancements
- :class:`.CategoricalConversionWarning`, :class:`.InvalidComparison`, :class:`.InvalidVersion`, :class:`.LossySetitemError`, and :class:`.NoBufferPresent` are now exposed in ``pandas.errors`` (:issue:`27656`)
- Fix ``test`` optional_extra by adding missing test package ``pytest-asyncio`` (:issue:`48361`)
- :func:`DataFrame.astype` exception message thrown improved to include column name when type conversion is not possible. (:issue:`47571`)
- :func:`date_range` now supports a ``unit`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49106`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also another reminder that in a future PR it would be good to collect all these non-nano updates and introduce them in a separate section

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

planning to do this once the astype and constructor stuff is all in

- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`)
- Added ``name`` parameter to :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_arrays` and :meth:`IntervalIndex.from_tuples` (:issue:`48911`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_200.notable_bug_fixes:
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/dtypes.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT


cpdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit)
cdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev)
cpdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev)
cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil
cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1
cpdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/dtypes.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ def periods_per_second(reso: int) -> int: ...
def is_supported_unit(reso: int) -> bool: ...
def npy_unit_to_abbrev(reso: int) -> str: ...
def get_supported_reso(reso: int) -> int: ...
def abbrev_to_npy_unit(abbrev: str) -> int: ...

class PeriodDtypeBase:
_dtype_code: int # PeriodDtypeCode
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ cpdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit):
raise NotImplementedError(unit)


cdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev):
cpdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev):
if abbrev == "Y":
return NPY_DATETIMEUNIT.NPY_FR_Y
elif abbrev == "M":
Expand Down
19 changes: 17 additions & 2 deletions pandas/core/arrays/_ranges.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def generate_regular_range(
end: Timestamp | Timedelta | None,
periods: int | None,
freq: BaseOffset,
unit: str = "ns",
) -> npt.NDArray[np.intp]:
"""
Generate a range of dates or timestamps with the spans between dates
Expand All @@ -37,14 +38,28 @@ def generate_regular_range(
Number of periods in produced date range.
freq : Tick
Describes space between dates in produced date range.
unit : str, default "ns"
The resolution the output is meant to represent.

Returns
-------
ndarray[np.int64] Representing nanoseconds.
ndarray[np.int64]
Representing the given resolution.
"""
istart = start.value if start is not None else None
iend = end.value if end is not None else None
stride = freq.nanos
freq.nanos # raises if non-fixed frequency
td = Timedelta(freq)
try:
td = td.as_unit( # pyright: ignore[reportGeneralTypeIssues]
unit, round_ok=False
)
except ValueError as err:
raise ValueError(
f"freq={freq} is incompatible with unit={unit}. "
"Use a lower freq or a higher unit instead."
) from err
stride = int(td.value)

if periods is None and istart is not None and iend is not None:
b = istart
Expand Down
28 changes: 24 additions & 4 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
tz_convert_from_utc,
tzconversion,
)
from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit
from pandas._typing import (
DateTimeErrorChoices,
IntervalClosedType,
Expand Down Expand Up @@ -380,6 +381,8 @@ def _generate_range( # type: ignore[override]
ambiguous: TimeAmbiguous = "raise",
nonexistent: TimeNonexistent = "raise",
inclusive: IntervalClosedType = "both",
*,
unit: str | None = None,
) -> DatetimeArray:

periods = dtl.validate_periods(periods)
Expand All @@ -402,6 +405,17 @@ def _generate_range( # type: ignore[override]
if start is NaT or end is NaT:
raise ValueError("Neither `start` nor `end` can be NaT")

if unit is not None:
if unit not in ["s", "ms", "us", "ns"]:
raise ValueError("'unit' must be one of 's', 'ms', 'us', 'ns'")
else:
unit = "ns"

if start is not None and unit is not None:
start = start.as_unit(unit, round_ok=False)
if end is not None and unit is not None:
end = end.as_unit(unit, round_ok=False)

left_inclusive, right_inclusive = validate_inclusive(inclusive)
start, end = _maybe_normalize_endpoints(start, end, normalize)
tz = _infer_tz_from_endpoints(start, end, tz)
Expand All @@ -416,6 +430,7 @@ def _generate_range( # type: ignore[override]
end = _maybe_localize_point(
end, end_tz, end, freq, tz, ambiguous, nonexistent
)

if freq is not None:
# We break Day arithmetic (fixed 24 hour) here and opt for
# Day to mean calendar day (23/24/25 hour). Therefore, strip
Expand All @@ -427,7 +442,7 @@ def _generate_range( # type: ignore[override]
end = end.tz_localize(None)

if isinstance(freq, Tick):
i8values = generate_regular_range(start, end, periods, freq)
i8values = generate_regular_range(start, end, periods, freq, unit=unit)
else:
xdr = _generate_range(
start=start, end=end, periods=periods, offset=freq
Expand All @@ -441,8 +456,13 @@ def _generate_range( # type: ignore[override]
if not timezones.is_utc(tz):
# short-circuit tz_localize_to_utc which would make
# an unnecessary copy with UTC but be a no-op.
creso = abbrev_to_npy_unit(unit)
i8values = tzconversion.tz_localize_to_utc(
i8values, tz, ambiguous=ambiguous, nonexistent=nonexistent
i8values,
tz,
ambiguous=ambiguous,
nonexistent=nonexistent,
creso=creso,
)

# i8values is localized datetime64 array -> have to convert
Expand Down Expand Up @@ -477,8 +497,8 @@ def _generate_range( # type: ignore[override]
if not right_inclusive and len(i8values) and i8values[-1] == end_i8:
i8values = i8values[:-1]

dt64_values = i8values.view("datetime64[ns]")
dtype = tz_to_dtype(tz)
dt64_values = i8values.view(f"datetime64[{unit}]")
dtype = tz_to_dtype(tz, unit=unit)
return cls._simple_new(dt64_values, freq=freq, dtype=dtype)

# -----------------------------------------------------------------
Expand Down
15 changes: 15 additions & 0 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,8 @@ def date_range(
normalize: bool = False,
name: Hashable = None,
inclusive: IntervalClosedType = "both",
*,
unit: str | None = None,
**kwargs,
) -> DatetimeIndex:
"""
Expand Down Expand Up @@ -856,6 +858,10 @@ def date_range(
Include boundaries; Whether to set each bound as closed or open.

.. versionadded:: 1.4.0
unit : str, default None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add an example in the Examples section below?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated+green

Specify the desired resolution of the result.

.. versionadded:: 2.0.0
**kwargs
For compatibility. Has no effect on the result.

Expand Down Expand Up @@ -966,6 +972,14 @@ def date_range(
>>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='right')
DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'],
dtype='datetime64[ns]', freq='D')

**Specify a unit**

>>> pd.date_range(start="2017-01-01", periods=10, freq="100AS", unit="s")
DatetimeIndex(['2017-01-01', '2117-01-01', '2217-01-01', '2317-01-01',
'2417-01-01', '2517-01-01', '2617-01-01', '2717-01-01',
'2817-01-01', '2917-01-01'],
dtype='datetime64[s]', freq='100AS-JAN')
"""
if freq is None and com.any_none(periods, start, end):
freq = "D"
Expand All @@ -978,6 +992,7 @@ def date_range(
tz=tz,
normalize=normalize,
inclusive=inclusive,
unit=unit,
**kwargs,
)
return DatetimeIndex._simple_new(dtarr, name=name)
Expand Down
66 changes: 66 additions & 0 deletions pandas/tests/indexes/datetimes/test_date_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -1184,3 +1184,69 @@ def test_date_range_with_custom_holidays():
freq=freq,
)
tm.assert_index_equal(result, expected)


class TestDateRangeNonNano:
def test_date_range_reso_validation(self):
msg = "'unit' must be one of 's', 'ms', 'us', 'ns'"
with pytest.raises(ValueError, match=msg):
date_range("2016-01-01", "2016-03-04", periods=3, unit="h")

def test_date_range_freq_higher_than_reso(self):
# freq being higher-resolution than reso is a problem
msg = "Use a lower freq or a higher unit instead"
with pytest.raises(ValueError, match=msg):
# # TODO give a more useful or informative message?
date_range("2016-01-01", "2016-01-02", freq="ns", unit="ms")

def test_date_range_freq_matches_reso(self):
# GH#49106 matching reso is OK
dti = date_range("2016-01-01", "2016-01-01 00:00:01", freq="ms", unit="ms")
rng = np.arange(1_451_606_400_000, 1_451_606_401_001, dtype=np.int64)
expected = DatetimeIndex(rng.view("M8[ms]"), freq="ms")
tm.assert_index_equal(dti, expected)

dti = date_range("2016-01-01", "2016-01-01 00:00:01", freq="us", unit="us")
rng = np.arange(1_451_606_400_000_000, 1_451_606_401_000_001, dtype=np.int64)
expected = DatetimeIndex(rng.view("M8[us]"), freq="us")
tm.assert_index_equal(dti, expected)

dti = date_range("2016-01-01", "2016-01-01 00:00:00.001", freq="ns", unit="ns")
rng = np.arange(
1_451_606_400_000_000_000, 1_451_606_400_001_000_001, dtype=np.int64
)
expected = DatetimeIndex(rng.view("M8[ns]"), freq="ns")
tm.assert_index_equal(dti, expected)

def test_date_range_freq_lower_than_endpoints(self):
start = Timestamp("2022-10-19 11:50:44.719781")
end = Timestamp("2022-10-19 11:50:47.066458")

# start and end cannot be cast to "s" unit without lossy rounding,
# so we do not allow this in date_range
with pytest.raises(ValueError, match="Cannot losslessly convert units"):
date_range(start, end, periods=3, unit="s")

# but we can losslessly cast to "us"
dti = date_range(start, end, periods=2, unit="us")
rng = np.array(
[start.as_unit("us").value, end.as_unit("us").value], dtype=np.int64
)
expected = DatetimeIndex(rng.view("M8[us]"))
tm.assert_index_equal(dti, expected)

def test_date_range_non_nano(self):
start = np.datetime64("1066-10-14") # Battle of Hastings
end = np.datetime64("2305-07-13") # Jean-Luc Picard's birthday

dti = date_range(start, end, freq="D", unit="s")
assert dti.freq == "D"
assert dti.dtype == "M8[s]"

exp = np.arange(
start.astype("M8[s]").view("i8"),
(end + 1).astype("M8[s]").view("i8"),
24 * 3600,
).view("M8[s]")

tm.assert_numpy_array_equal(dti.to_numpy(), exp)