pandas-dev · jbrockmendel · Jul 10, 2018
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -403,7 +403,7 @@ def _addsub_int_array(self, other, op):
             td = Timedelta(self.freq)
             return op(self, td * other)
 
-        # We should only get here with DatetimeIndex; dispatch
+        # We should only get here with Datetime Array/Index; dispatch
         # to _addsub_offset_array
         assert not is_timedelta64_dtype(self)
         return op(self, np.array(other) * self.freq)

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-from datetime import timedelta
+from datetime import datetime, timedelta
 import warnings
 
 import numpy as np
@@ -22,6 +22,8 @@
     _ensure_int64)
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
 
+from pandas.core.algorithms import checked_add_with_arr
+
 from pandas.tseries.frequencies import to_offset, DateOffset
 from pandas.tseries.offsets import Tick
 
@@ -250,8 +252,41 @@ def _assert_tzawareness_compat(self, other):
     # -----------------------------------------------------------------
     # Arithmetic Methods
 
+    def _sub_datelike(self, other):
+        # subtract a datetime from myself, yielding a ndarray[timedelta64[ns]]
+        if isinstance(other, (DatetimeArrayMixin, np.ndarray)):
+            if isinstance(other, np.ndarray):
+                # if other is an ndarray, we assume it is datetime64-dtype
+                other = type(self)(other)
+            # require tz compat
+            if not self._has_same_tz(other):
+                raise TypeError("{cls} subtraction must have the same "
+                                "timezones or no timezones"
+                                .format(cls=type(self).__name__))
+            result = self._sub_datelike_dti(other)
+        elif isinstance(other, (datetime, np.datetime64)):
+            assert other is not NaT
+            other = Timestamp(other)
+            if other is NaT:
+                return self - NaT
+            elif not self._has_same_tz(other):
+                # require tz compat
+                raise TypeError("Timestamp subtraction must have the same "
+                                "timezones or no timezones")
+            else:
+                i8 = self.asi8
+                result = checked_add_with_arr(i8, -other.value,
+                                              arr_mask=self._isnan)
+                result = self._maybe_mask_results(result,
+                                                  fill_value=iNaT)
+        else:
+            raise TypeError("cannot subtract {cls} and {typ}"
+                            .format(cls=type(self).__name__,
+                                    typ=type(other).__name__))
+        return result.view('timedelta64[ns]')
+
     def _sub_datelike_dti(self, other):
-        """subtraction of two DatetimeIndexes"""
+        """subtraction of two Datetime Arrays/Indexes"""
         if not len(self) == len(other):
             raise ValueError("cannot add indices of unequal length")
 
@@ -517,6 +552,48 @@ def to_pydatetime(self):
         """
         return tslib.ints_to_pydatetime(self.asi8, tz=self.tz)
 
+    def normalize(self):
+        """
+        Convert times to midnight.
+
+        The time component of the date-time is converted to midnight i.e.
+        00:00:00. This is useful in cases, when the time does not matter.
+        Length is unaltered. The timezones are unaffected.
+
+        This method is available on Series with datetime values under
+        the ``.dt`` accessor, and directly on DatetimeIndex.
+
+        Returns
+        -------
+        DatetimeArray, DatetimeIndex or Series
+            The same type as the original data. Series will have the same
+            name and index. DatetimeIndex will have the same name.
+
+        See Also
+        --------
+        floor : Floor the datetimes to the specified freq.
+        ceil : Ceil the datetimes to the specified freq.
+        round : Round the datetimes to the specified freq.
+
+        Examples
+        --------
+        >>> idx = pd.DatetimeIndex(start='2014-08-01 10:00', freq='H',
+        ...                        periods=3, tz='Asia/Calcutta')
+        >>> idx
+        DatetimeIndex(['2014-08-01 10:00:00+05:30',
+                       '2014-08-01 11:00:00+05:30',
+                       '2014-08-01 12:00:00+05:30'],
+                        dtype='datetime64[ns, Asia/Calcutta]', freq='H')
+        >>> idx.normalize()
+        DatetimeIndex(['2014-08-01 00:00:00+05:30',
+                       '2014-08-01 00:00:00+05:30',
+                       '2014-08-01 00:00:00+05:30'],
+                       dtype='datetime64[ns, Asia/Calcutta]', freq=None)
+        """
+        new_values = conversion.normalize_i8_timestamps(self.asi8, self.tz)
+        return type(self)(new_values,
+                          freq='infer').tz_localize(self.tz)
+
     # -----------------------------------------------------------------
     # Properties - Vectorized Timestamp Properties/Methods
 

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -5,20 +5,25 @@
 import numpy as np
 
 from pandas._libs import lib
-from pandas._libs.tslib import NaT, iNaT
 from pandas._libs.tslibs.period import (
     Period, IncompatibleFrequency, DIFFERENT_FREQ_INDEX,
     get_period_field_arr, period_asfreq_arr)
-from pandas._libs.tslibs import period as libperiod
-from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds
+from pandas._libs.tslibs import (
+    NaT, iNaT,
+    delta_to_nanoseconds,
+    period as libperiod)
 from pandas._libs.tslibs.fields import isleapyear_arr
 
 from pandas import compat
+from pandas.compat import zip
 from pandas.util._decorators import cache_readonly
 
 from pandas.core.dtypes.common import (
     is_integer_dtype, is_float_dtype, is_period_dtype)
 from pandas.core.dtypes.dtypes import PeriodDtype
+from pandas.core.dtypes.generic import ABCSeries
+
+import pandas.core.common as com
 
 from pandas.tseries import frequencies
 from pandas.tseries.offsets import Tick, DateOffset
@@ -157,6 +162,25 @@ def _from_ordinals(cls, values, freq=None):
         result._freq = Period._maybe_convert_freq(freq)
         return result
 
+    @classmethod
+    def _generate_range(cls, start, end, periods, freq, fields):
+        if freq is not None:
+            freq = Period._maybe_convert_freq(freq)
+
+        field_count = len(fields)
+        if com._count_not_none(start, end) > 0:
+            if field_count > 0:
+                raise ValueError('Can either instantiate from fields '
+                                 'or endpoints, but not both')
+            subarr, freq = _get_ordinal_range(start, end, periods, freq)
+        elif field_count > 0:
+            subarr, freq = _range_from_fields(freq=freq, **fields)
+        else:
+            raise ValueError('Not enough parameters to construct '
+                             'Period range')
+
+        return subarr, freq
+
     # --------------------------------------------------------------------
     # Vectorized analogues of Period properties
 
@@ -371,3 +395,102 @@ def _add_comparison_methods(cls):
 
 
 PeriodArrayMixin._add_comparison_methods()
+
+
+# -----------------------------------------------------------------
+# Constructor Helpers
+
+def _get_ordinal_range(start, end, periods, freq, mult=1):
+    if com._count_not_none(start, end, periods) != 2:
+        raise ValueError('Of the three parameters: start, end, and periods, '
+                         'exactly two must be specified')
+
+    if freq is not None:
+        _, mult = frequencies.get_freq_code(freq)
+
+    if start is not None:
+        start = Period(start, freq)
+    if end is not None:
+        end = Period(end, freq)
+
+    is_start_per = isinstance(start, Period)
+    is_end_per = isinstance(end, Period)
+
+    if is_start_per and is_end_per and start.freq != end.freq:
+        raise ValueError('start and end must have same freq')
+    if (start is NaT or end is NaT):
+        raise ValueError('start and end must not be NaT')
+
+    if freq is None:
+        if is_start_per:
+            freq = start.freq
+        elif is_end_per:
+            freq = end.freq
+        else:  # pragma: no cover
+            raise ValueError('Could not infer freq from start/end')
+
+    if periods is not None:
+        periods = periods * mult
+        if start is None:
+            data = np.arange(end.ordinal - periods + mult,
+                             end.ordinal + 1, mult,
+                             dtype=np.int64)
+        else:
+            data = np.arange(start.ordinal, start.ordinal + periods, mult,
+                             dtype=np.int64)
+    else:
+        data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64)
+
+    return data, freq
+
+
+def _range_from_fields(year=None, month=None, quarter=None, day=None,
+                       hour=None, minute=None, second=None, freq=None):
+    if hour is None:
+        hour = 0
+    if minute is None:
+        minute = 0
+    if second is None:
+        second = 0
+    if day is None:
+        day = 1
+
+    ordinals = []
+
+    if quarter is not None:
+        if freq is None:
+            freq = 'Q'
+            base = frequencies.FreqGroup.FR_QTR
+        else:
+            base, mult = frequencies.get_freq_code(freq)
+            if base != frequencies.FreqGroup.FR_QTR:
+                raise AssertionError("base must equal FR_QTR")
+
+        year, quarter = _make_field_arrays(year, quarter)
+        for y, q in zip(year, quarter):
+            y, m = libperiod._quarter_to_myear(y, q, freq)
+            val = libperiod.period_ordinal(y, m, 1, 1, 1, 1, 0, 0, base)
+            ordinals.append(val)
+    else:
+        base, mult = frequencies.get_freq_code(freq)
+        arrays = _make_field_arrays(year, month, day, hour, minute, second)
+        for y, mth, d, h, mn, s in zip(*arrays):
+            ordinals.append(libperiod.period_ordinal(
+                y, mth, d, h, mn, s, 0, 0, base))
+
+    return np.array(ordinals, dtype=np.int64), freq
+
+
+def _make_field_arrays(*fields):
+    length = None
+    for x in fields:
+        if isinstance(x, (list, np.ndarray, ABCSeries)):
+            if length is not None and len(x) != length:
+                raise ValueError('Mismatched Period array lengths')
+            elif length is None:
+                length = len(x)
+
+    arrays = [np.asarray(x) if isinstance(x, (np.ndarray, list, ABCSeries))
+              else np.repeat(x, length) for x in fields]
+
+    return arrays
diff --git a/pandas/core/arrays/timedelta.py b/pandas/core/arrays/timedelta.py
@@ -4,7 +4,7 @@
 import numpy as np
 
 from pandas._libs import tslibs
-from pandas._libs.tslibs import Timedelta, NaT
+from pandas._libs.tslibs import Timestamp, Timedelta, NaT, iNaT
 from pandas._libs.tslibs.fields import get_timedelta_field
 from pandas._libs.tslibs.timedeltas import array_to_timedelta64
 
@@ -15,6 +15,8 @@
 from pandas.core.dtypes.generic import ABCSeries
 from pandas.core.dtypes.missing import isna
 
+from pandas.core.algorithms import checked_add_with_arr
+
 from pandas.tseries.offsets import Tick, DateOffset
 from pandas.tseries.frequencies import to_offset
 
@@ -92,6 +94,24 @@ def _add_offset(self, other):
                         .format(typ=type(other).__name__,
                                 cls=type(self).__name__))
 
+    def _add_datelike(self, other):
+        # adding a timedeltaindex to a datetimelike
+        from .datetimes import DatetimeArrayMixin
+        if isinstance(other, (DatetimeArrayMixin, np.ndarray)):
+            # if other is an ndarray, we assume it is datetime64-dtype
+            # defer to implementation in DatetimeIndex
+            if isinstance(other, np.ndarray):
+                other = DatetimeArrayMixin(other)
+            return other + self
+        else:
+            assert other is not NaT
+            other = Timestamp(other)
+            i8 = self.asi8
+            result = checked_add_with_arr(i8, other.value,
+                                          arr_mask=self._isnan)
+            result = self._maybe_mask_results(result, fill_value=iNaT)
+            return DatetimeArrayMixin(result)
+
     def _sub_datelike(self, other):
         assert other is not NaT
         raise TypeError("cannot subtract a datelike from a {cls}"
@@ -198,3 +218,33 @@ def to_pytimedelta(self):
     nanoseconds = _field_accessor("nanoseconds", "nanoseconds",
                                   "\nNumber of nanoseconds (>= 0 and less "
                                   "than 1 microsecond) for each\nelement.\n")
+
+    @property
+    def components(self):
+        """
+        Return a dataframe of the components (days, hours, minutes,
+        seconds, milliseconds, microseconds, nanoseconds) of the Timedeltas.
+
+        Returns
+        -------
+        a DataFrame
+        """
+        from pandas import DataFrame
+
+        columns = ['days', 'hours', 'minutes', 'seconds',
+                   'milliseconds', 'microseconds', 'nanoseconds']
+        hasnans = self.hasnans
+        if hasnans:
+            def f(x):
+                if isna(x):
+                    return [np.nan] * len(columns)
+                return x.components
+        else:
+            def f(x):
+                return x.components
+
+        result = DataFrame([f(x) for x in self])
+        result.columns = columns
+        if not hasnans:
+            result = result.astype('int64')
+        return result