pandas-dev · jreback · Dec 5, 2018 · Dec 3, 2018 · Dec 3, 2018 · Dec 5, 2018
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -14,9 +14,9 @@
 from pandas.util._decorators import Appender, cache_readonly
 
 from pandas.core.dtypes.common import (
-    _NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_extension_type,
-    is_float_dtype, is_int64_dtype, is_object_dtype, is_period_dtype,
-    is_timedelta64_dtype)
+    _INT64_DTYPE, _NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype,
+    is_dtype_equal, is_extension_type, is_float_dtype, is_int64_dtype,
+    is_object_dtype, is_period_dtype, is_string_dtype, is_timedelta64_dtype)
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
 from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
 from pandas.core.dtypes.missing import isna
@@ -206,45 +206,103 @@ def _simple_new(cls, values, freq=None, tz=None):
         result._tz = timezones.tz_standardize(tz)
         return result
 
-    def __new__(cls, values, freq=None, tz=None, dtype=None):
+    def __new__(cls, values, freq=None, tz=None, dtype=None, copy=False,
+                dayfirst=False, yearfirst=False, ambiguous='raise'):
+        return cls._from_sequence(
+            values, freq=freq, tz=tz, dtype=dtype, copy=copy,
+            dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous)
 
-        if freq is None and hasattr(values, "freq"):
-            # i.e. DatetimeArray, DatetimeIndex
-            freq = values.freq
+    @classmethod
+    def _from_sequence(cls, data, dtype=None, copy=False,
+                       tz=None, freq=None,
+                       dayfirst=False, yearfirst=False, ambiguous='raise'):
+
+        verify_integrity = True
 
         freq, freq_infer = dtl.maybe_infer_freq(freq)
+        if freq is None and hasattr(data, "freq"):
+            # i.e. DatetimeArray/Index
+            freq = data.freq
+            verify_integrity = False
 
         # if dtype has an embedded tz, capture it
         tz = dtl.validate_tz_from_dtype(dtype, tz)
 
-        if is_object_dtype(values):
-            # kludge; dispatch until the DatetimeArray constructor is complete
-            from pandas import DatetimeIndex
-            values = DatetimeIndex(values, freq=freq, tz=tz)
-
-        if isinstance(values, ABCSeries):
-            # extract to ndarray or DatetimeIndex
-            values = values._values
-
-        if isinstance(values, DatetimeArrayMixin):
-            # extract nanosecond unix timestamps
-            if tz is None:
-                tz = values.tz
-            values = values.asi8
-
-        if values.dtype == 'i8':
-            values = values.view('M8[ns]')
+        if not hasattr(data, "dtype"):
+            # e.g. list, tuple
+            if np.ndim(data) == 0:
+                # i.e. generator
+                data = list(data)
+            data = np.asarray(data)
+            copy = False
+        elif isinstance(data, ABCSeries):
+            data = data._values
+
+        # By this point we are assured to have either a numpy array or Index
+        data, copy = maybe_convert_dtype(data, copy)
+
+        if is_object_dtype(data) or is_string_dtype(data):
+            # TODO: We do not have tests specific to string-dtypes,
+            #  also complex or categorical or other extension
+            copy = False
+            if lib.infer_dtype(data) == 'integer':
+                data = data.astype(np.int64)
+            else:
+                # data comes back here as either i8 to denote UTC timestamps
+                #  or M8[ns] to denote wall times
+                data, inferred_tz = objects_to_datetime64ns(
+                    data, dayfirst=dayfirst, yearfirst=yearfirst)
+                tz = maybe_infer_tz(tz, inferred_tz)
+
+        if is_datetime64tz_dtype(data):
+            tz = maybe_infer_tz(tz, data.tz)
+            subarr = data._data
+
+        elif is_datetime64_dtype(data):
+            # tz-naive DatetimeArray/Index or ndarray[datetime64]
+            data = getattr(data, "_data", data)
+            if data.dtype != _NS_DTYPE:
+                data = conversion.ensure_datetime64ns(data)
+
+            if tz is not None:
+                # Convert tz-naive to UTC
+                tz = timezones.maybe_get_tz(tz)
+                data = conversion.tz_localize_to_utc(data.view('i8'), tz,
+                                                     ambiguous=ambiguous)
+                data = data.view(_NS_DTYPE)
+
+            assert data.dtype == _NS_DTYPE, data.dtype
+            subarr = data
 
-        assert isinstance(values, np.ndarray), type(values)
-        assert is_datetime64_dtype(values)  # not yet assured nanosecond
-        values = conversion.ensure_datetime64ns(values, copy=False)
+        else:
+            # must be integer dtype otherwise
+            # assume this data are epoch timestamps
+            if data.dtype != _INT64_DTYPE:
+                data = data.astype(np.int64, copy=False)
+            subarr = data.view(_NS_DTYPE)
+
+        if copy:
+            # TODO: should this be deepcopy?
+            subarr = subarr.copy()
+
+        assert isinstance(subarr, np.ndarray), type(subarr)
+        assert subarr.dtype == 'M8[ns]', subarr.dtype
+
+        result = cls._simple_new(subarr, freq=freq, tz=tz)
+        if dtype is not None:
+            # TODO: can we work this into validate_tz_from_dtype?
+            if not is_dtype_equal(result.dtype, dtype):
+                # dtype must be coerced to DatetimeTZDtype above
+                if result.tz is not None:
+                    raise ValueError("cannot localize from non-UTC data")
+
+        if verify_integrity and len(result) > 0:
+            if freq is not None and not freq_infer:
+                cls._validate_frequency(result, freq, ambiguous=ambiguous)
 
-        result = cls._simple_new(values, freq=freq, tz=tz)
         if freq_infer:
             result.freq = to_offset(result.inferred_freq)
 
-        # NB: Among other things not yet ported from the DatetimeIndex
-        # constructor, this does not call _deepcopy_if_needed
         return result
 
     @classmethod
@@ -1494,7 +1552,7 @@ def maybe_convert_dtype(data, copy):
     elif is_timedelta64_dtype(data):
         warnings.warn("Passing timedelta64-dtype data is deprecated, will "
                       "raise a TypeError in a future version",
-                      FutureWarning, stacklevel=3)
+                      FutureWarning, stacklevel=4)
         data = data.view(_NS_DTYPE)
 
     elif is_period_dtype(data):

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -9,24 +9,19 @@
 
 from pandas._libs import (
     Timestamp, index as libindex, join as libjoin, lib, tslib as libts)
-from pandas._libs.tslibs import (
-    ccalendar, conversion, fields, parsing, timezones)
+from pandas._libs.tslibs import ccalendar, fields, parsing, timezones
 import pandas.compat as compat
 from pandas.util._decorators import Appender, Substitution, cache_readonly
 
 from pandas.core.dtypes.common import (
-    _INT64_DTYPE, _NS_DTYPE, ensure_int64, is_datetime64_dtype,
-    is_datetime64_ns_dtype, is_datetime64tz_dtype, is_dtype_equal, is_float,
-    is_integer, is_list_like, is_object_dtype, is_period_dtype, is_scalar,
-    is_string_dtype, is_string_like, pandas_dtype)
+    _NS_DTYPE, ensure_int64, is_datetime64_ns_dtype, is_dtype_equal, is_float,
+    is_integer, is_list_like, is_period_dtype, is_scalar, is_string_like,
+    pandas_dtype)
 import pandas.core.dtypes.concat as _concat
-from pandas.core.dtypes.generic import ABCSeries
 from pandas.core.dtypes.missing import isna
 
-from pandas.core.arrays import datetimelike as dtl
 from pandas.core.arrays.datetimes import (
-    DatetimeArrayMixin as DatetimeArray, _to_m8, maybe_convert_dtype,
-    maybe_infer_tz, objects_to_datetime64ns)
+    DatetimeArrayMixin as DatetimeArray, _to_m8)
 from pandas.core.base import _shared_docs
 import pandas.core.common as com
 from pandas.core.indexes.base import Index, _index_shared_docs
@@ -49,12 +44,17 @@ def _new_DatetimeIndex(cls, d):
     # so need to localize
     tz = d.pop('tz', None)
 
-    with warnings.catch_warnings():
-        # we ignore warnings from passing verify_integrity=False
-        # TODO: If we knew what was going in to **d, we might be able to
-        #  go through _simple_new instead
-        warnings.simplefilter("ignore")
-        result = cls.__new__(cls, verify_integrity=False, **d)
+    if "data" in d and not isinstance(d["data"], DatetimeIndex):
+        # Avoid need to verify integrity by calling simple_new directly
+        data = d.pop("data")
+        result = cls._simple_new(data, **d)
+    else:
+        with warnings.catch_warnings():
+            # we ignore warnings from passing verify_integrity=False
+            # TODO: If we knew what was going in to **d, we might be able to
+            #  go through _simple_new instead
+            warnings.simplefilter("ignore")
+            result = cls.__new__(cls, verify_integrity=False, **d)
 
     if tz is not None:
         result = result.tz_localize('UTC').tz_convert(tz)
@@ -260,81 +260,12 @@ def __new__(cls, data=None,
         if name is None and hasattr(data, 'name'):
             name = data.name
 
-        freq, freq_infer = dtl.maybe_infer_freq(freq)
-        if freq is None and hasattr(data, "freq"):
-            # i.e. DatetimeArray/Index
-            freq = data.freq
-            verify_integrity = False
-
-        # if dtype has an embedded tz, capture it
-        tz = dtl.validate_tz_from_dtype(dtype, tz)
-
-        if not hasattr(data, "dtype"):
-            # e.g. list, tuple
-            if np.ndim(data) == 0:
-                # i.e. generator
-                data = list(data)
-            data = np.asarray(data)
-            copy = False
-        elif isinstance(data, ABCSeries):
-            data = data._values
-
-        # By this point we are assured to have either a numpy array or Index
-        data, copy = maybe_convert_dtype(data, copy)
-
-        if is_object_dtype(data) or is_string_dtype(data):
-            # TODO: We do not have tests specific to string-dtypes,
-            #  also complex or categorical or other extension
-            copy = False
-            if lib.infer_dtype(data) == 'integer':
-                data = data.astype(np.int64)
-            else:
-                # data comes back here as either i8 to denote UTC timestamps
-                #  or M8[ns] to denote wall times
-                data, inferred_tz = objects_to_datetime64ns(
-                    data, dayfirst=dayfirst, yearfirst=yearfirst)
-                tz = maybe_infer_tz(tz, inferred_tz)
-
-        if is_datetime64tz_dtype(data):
-            tz = maybe_infer_tz(tz, data.tz)
-            subarr = data._data
-
-        elif is_datetime64_dtype(data):
-            # tz-naive DatetimeArray/Index or ndarray[datetime64]
-            data = getattr(data, "_data", data)
-            if data.dtype != _NS_DTYPE:
-                data = conversion.ensure_datetime64ns(data)
-
-            if tz is not None:
-                # Convert tz-naive to UTC
-                tz = timezones.maybe_get_tz(tz)
-                data = conversion.tz_localize_to_utc(data.view('i8'), tz,
-                                                     ambiguous=ambiguous)
-            subarr = data.view(_NS_DTYPE)
+        dtarr = DatetimeArray._from_sequence(
+            data, dtype=dtype, copy=copy, tz=tz, freq=freq,
+            dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous)
 
-        else:
-            # must be integer dtype otherwise
-            # assume this data are epoch timestamps
-            if data.dtype != _INT64_DTYPE:
-                data = data.astype(np.int64, copy=False)
-            subarr = data.view(_NS_DTYPE)
-
-        assert isinstance(subarr, np.ndarray), type(subarr)
-        assert subarr.dtype == 'M8[ns]', subarr.dtype
-
-        subarr = cls._simple_new(subarr, name=name, freq=freq, tz=tz)
-        if dtype is not None:
-            if not is_dtype_equal(subarr.dtype, dtype):
-                # dtype must be coerced to DatetimeTZDtype above
-                if subarr.tz is not None:
-                    raise ValueError("cannot localize from non-UTC data")
-
-        if verify_integrity and len(subarr) > 0:
-            if freq is not None and not freq_infer:
-                cls._validate_frequency(subarr, freq, ambiguous=ambiguous)
-
-        if freq_infer:
-            subarr.freq = to_offset(subarr.inferred_freq)
+        subarr = cls._simple_new(dtarr._data, name=name,
+                                 freq=dtarr.freq, tz=dtarr.tz)
 
         return subarr._deepcopy_if_needed(ref_to_data, copy)