Skip to content

[REF] Move constructor helpers to EA Mixin classes #21843

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ def _addsub_int_array(self, other, op):
td = Timedelta(self.freq)
return op(self, td * other)

# We should only get here with DatetimeIndex; dispatch
# We should only get here with Datetime Array/Index; dispatch
# to _addsub_offset_array
assert not is_timedelta64_dtype(self)
return op(self, np.array(other) * self.freq)
Expand Down
81 changes: 79 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
from datetime import timedelta
from datetime import datetime, timedelta
import warnings

import numpy as np
Expand All @@ -22,6 +22,8 @@
_ensure_int64)
from pandas.core.dtypes.dtypes import DatetimeTZDtype

from pandas.core.algorithms import checked_add_with_arr

from pandas.tseries.frequencies import to_offset, DateOffset
from pandas.tseries.offsets import Tick

Expand Down Expand Up @@ -250,8 +252,41 @@ def _assert_tzawareness_compat(self, other):
# -----------------------------------------------------------------
# Arithmetic Methods

def _sub_datelike(self, other):
# subtract a datetime from myself, yielding a ndarray[timedelta64[ns]]
if isinstance(other, (DatetimeArrayMixin, np.ndarray)):
if isinstance(other, np.ndarray):
# if other is an ndarray, we assume it is datetime64-dtype
other = type(self)(other)
# require tz compat
if not self._has_same_tz(other):
raise TypeError("{cls} subtraction must have the same "
"timezones or no timezones"
.format(cls=type(self).__name__))
result = self._sub_datelike_dti(other)
elif isinstance(other, (datetime, np.datetime64)):
assert other is not NaT
other = Timestamp(other)
if other is NaT:
return self - NaT
elif not self._has_same_tz(other):
# require tz compat
raise TypeError("Timestamp subtraction must have the same "
"timezones or no timezones")
else:
i8 = self.asi8
result = checked_add_with_arr(i8, -other.value,
arr_mask=self._isnan)
result = self._maybe_mask_results(result,
fill_value=iNaT)
else:
raise TypeError("cannot subtract {cls} and {typ}"
.format(cls=type(self).__name__,
typ=type(other).__name__))
return result.view('timedelta64[ns]')

def _sub_datelike_dti(self, other):
"""subtraction of two DatetimeIndexes"""
"""subtraction of two Datetime Arrays/Indexes"""
if not len(self) == len(other):
raise ValueError("cannot add indices of unequal length")

Expand Down Expand Up @@ -517,6 +552,48 @@ def to_pydatetime(self):
"""
return tslib.ints_to_pydatetime(self.asi8, tz=self.tz)

def normalize(self):
"""
Convert times to midnight.

The time component of the date-time is converted to midnight i.e.
00:00:00. This is useful in cases, when the time does not matter.
Length is unaltered. The timezones are unaffected.

This method is available on Series with datetime values under
the ``.dt`` accessor, and directly on DatetimeIndex.

Returns
-------
DatetimeArray, DatetimeIndex or Series
The same type as the original data. Series will have the same
name and index. DatetimeIndex will have the same name.

See Also
--------
floor : Floor the datetimes to the specified freq.
ceil : Ceil the datetimes to the specified freq.
round : Round the datetimes to the specified freq.

Examples
--------
>>> idx = pd.DatetimeIndex(start='2014-08-01 10:00', freq='H',
... periods=3, tz='Asia/Calcutta')
>>> idx
DatetimeIndex(['2014-08-01 10:00:00+05:30',
'2014-08-01 11:00:00+05:30',
'2014-08-01 12:00:00+05:30'],
dtype='datetime64[ns, Asia/Calcutta]', freq='H')
>>> idx.normalize()
DatetimeIndex(['2014-08-01 00:00:00+05:30',
'2014-08-01 00:00:00+05:30',
'2014-08-01 00:00:00+05:30'],
dtype='datetime64[ns, Asia/Calcutta]', freq=None)
"""
new_values = conversion.normalize_i8_timestamps(self.asi8, self.tz)
return type(self)(new_values,
freq='infer').tz_localize(self.tz)

# -----------------------------------------------------------------
# Properties - Vectorized Timestamp Properties/Methods

Expand Down
129 changes: 126 additions & 3 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,25 @@
import numpy as np

from pandas._libs import lib
from pandas._libs.tslib import NaT, iNaT
from pandas._libs.tslibs.period import (
Period, IncompatibleFrequency, DIFFERENT_FREQ_INDEX,
get_period_field_arr, period_asfreq_arr)
from pandas._libs.tslibs import period as libperiod
from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds
from pandas._libs.tslibs import (
NaT, iNaT,
delta_to_nanoseconds,
period as libperiod)
from pandas._libs.tslibs.fields import isleapyear_arr

from pandas import compat
from pandas.compat import zip
from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.common import (
is_integer_dtype, is_float_dtype, is_period_dtype)
from pandas.core.dtypes.dtypes import PeriodDtype
from pandas.core.dtypes.generic import ABCSeries

import pandas.core.common as com

from pandas.tseries import frequencies
from pandas.tseries.offsets import Tick, DateOffset
Expand Down Expand Up @@ -157,6 +162,25 @@ def _from_ordinals(cls, values, freq=None):
result._freq = Period._maybe_convert_freq(freq)
return result

@classmethod
def _generate_range(cls, start, end, periods, freq, fields):
if freq is not None:
freq = Period._maybe_convert_freq(freq)

field_count = len(fields)
if com._count_not_none(start, end) > 0:
if field_count > 0:
raise ValueError('Can either instantiate from fields '
'or endpoints, but not both')
subarr, freq = _get_ordinal_range(start, end, periods, freq)
elif field_count > 0:
subarr, freq = _range_from_fields(freq=freq, **fields)
else:
raise ValueError('Not enough parameters to construct '
'Period range')

return subarr, freq

# --------------------------------------------------------------------
# Vectorized analogues of Period properties

Expand Down Expand Up @@ -371,3 +395,102 @@ def _add_comparison_methods(cls):


PeriodArrayMixin._add_comparison_methods()


# -----------------------------------------------------------------
# Constructor Helpers

def _get_ordinal_range(start, end, periods, freq, mult=1):
if com._count_not_none(start, end, periods) != 2:
raise ValueError('Of the three parameters: start, end, and periods, '
'exactly two must be specified')

if freq is not None:
_, mult = frequencies.get_freq_code(freq)

if start is not None:
start = Period(start, freq)
if end is not None:
end = Period(end, freq)

is_start_per = isinstance(start, Period)
is_end_per = isinstance(end, Period)

if is_start_per and is_end_per and start.freq != end.freq:
raise ValueError('start and end must have same freq')
if (start is NaT or end is NaT):
raise ValueError('start and end must not be NaT')

if freq is None:
if is_start_per:
freq = start.freq
elif is_end_per:
freq = end.freq
else: # pragma: no cover
raise ValueError('Could not infer freq from start/end')

if periods is not None:
periods = periods * mult
if start is None:
data = np.arange(end.ordinal - periods + mult,
end.ordinal + 1, mult,
dtype=np.int64)
else:
data = np.arange(start.ordinal, start.ordinal + periods, mult,
dtype=np.int64)
else:
data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64)

return data, freq


def _range_from_fields(year=None, month=None, quarter=None, day=None,
hour=None, minute=None, second=None, freq=None):
if hour is None:
hour = 0
if minute is None:
minute = 0
if second is None:
second = 0
if day is None:
day = 1

ordinals = []

if quarter is not None:
if freq is None:
freq = 'Q'
base = frequencies.FreqGroup.FR_QTR
else:
base, mult = frequencies.get_freq_code(freq)
if base != frequencies.FreqGroup.FR_QTR:
raise AssertionError("base must equal FR_QTR")

year, quarter = _make_field_arrays(year, quarter)
for y, q in zip(year, quarter):
y, m = libperiod._quarter_to_myear(y, q, freq)
val = libperiod.period_ordinal(y, m, 1, 1, 1, 1, 0, 0, base)
ordinals.append(val)
else:
base, mult = frequencies.get_freq_code(freq)
arrays = _make_field_arrays(year, month, day, hour, minute, second)
for y, mth, d, h, mn, s in zip(*arrays):
ordinals.append(libperiod.period_ordinal(
y, mth, d, h, mn, s, 0, 0, base))

return np.array(ordinals, dtype=np.int64), freq


def _make_field_arrays(*fields):
length = None
for x in fields:
if isinstance(x, (list, np.ndarray, ABCSeries)):
if length is not None and len(x) != length:
raise ValueError('Mismatched Period array lengths')
elif length is None:
length = len(x)

arrays = [np.asarray(x) if isinstance(x, (np.ndarray, list, ABCSeries))
else np.repeat(x, length) for x in fields]

return arrays
52 changes: 51 additions & 1 deletion pandas/core/arrays/timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np

from pandas._libs import tslibs
from pandas._libs.tslibs import Timedelta, NaT
from pandas._libs.tslibs import Timestamp, Timedelta, NaT, iNaT
from pandas._libs.tslibs.fields import get_timedelta_field
from pandas._libs.tslibs.timedeltas import array_to_timedelta64

Expand All @@ -15,6 +15,8 @@
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.dtypes.missing import isna

from pandas.core.algorithms import checked_add_with_arr

from pandas.tseries.offsets import Tick, DateOffset
from pandas.tseries.frequencies import to_offset

Expand Down Expand Up @@ -92,6 +94,24 @@ def _add_offset(self, other):
.format(typ=type(other).__name__,
cls=type(self).__name__))

def _add_datelike(self, other):
# adding a timedeltaindex to a datetimelike
from .datetimes import DatetimeArrayMixin
if isinstance(other, (DatetimeArrayMixin, np.ndarray)):
# if other is an ndarray, we assume it is datetime64-dtype
# defer to implementation in DatetimeIndex
if isinstance(other, np.ndarray):
other = DatetimeArrayMixin(other)
return other + self
else:
assert other is not NaT
other = Timestamp(other)
i8 = self.asi8
result = checked_add_with_arr(i8, other.value,
arr_mask=self._isnan)
result = self._maybe_mask_results(result, fill_value=iNaT)
return DatetimeArrayMixin(result)

def _sub_datelike(self, other):
assert other is not NaT
raise TypeError("cannot subtract a datelike from a {cls}"
Expand Down Expand Up @@ -198,3 +218,33 @@ def to_pytimedelta(self):
nanoseconds = _field_accessor("nanoseconds", "nanoseconds",
"\nNumber of nanoseconds (>= 0 and less "
"than 1 microsecond) for each\nelement.\n")

@property
def components(self):
"""
Return a dataframe of the components (days, hours, minutes,
seconds, milliseconds, microseconds, nanoseconds) of the Timedeltas.

Returns
-------
a DataFrame
"""
from pandas import DataFrame

columns = ['days', 'hours', 'minutes', 'seconds',
'milliseconds', 'microseconds', 'nanoseconds']
hasnans = self.hasnans
if hasnans:
def f(x):
if isna(x):
return [np.nan] * len(columns)
return x.components
else:
def f(x):
return x.components

result = DataFrame([f(x) for x in self])
result.columns = columns
if not hasnans:
result = result.astype('int64')
return result
Loading