Skip to content

AmbiguousTimeError with set_index() #12920

Closed
@tomyun

Description

@tomyun

Code Sample, a copy-pastable example if possible

In [1]: import pandas as pd

In [2]: pd.__version__
Out[2]: '0.18.0'

In [3]: di = pd.date_range('2006-10-29 00:00:00', periods=3, freq='H', tz='US/Pacific')

In [4]: di
Out[4]:
DatetimeIndex(['2006-10-29 00:00:00-07:00', '2006-10-29 01:00:00-07:00',
               '2006-10-29 01:00:00-08:00'],
              dtype='datetime64[ns, US/Pacific]', freq='H')

In [5]: df = pd.DataFrame(data={'a': [0,1,2], 'b': [3,4,5]}, index=di).reset_index()

In [6]: df
Out[6]:
                      index  a  b
0 2006-10-29 00:00:00-07:00  0  3
1 2006-10-29 01:00:00-07:00  1  4
2 2006-10-29 01:00:00-08:00  2  5

In [7]: df.set_index('index')
Out[7]:
                           a  b
index
2006-10-29 00:00:00-07:00  0  3
2006-10-29 01:00:00-07:00  1  4
2006-10-29 01:00:00-08:00  2  5

In [8]: df.set_index(['index', 'a'])
---------------------------------------------------------------------------
AmbiguousTimeError                        Traceback (most recent call last)
<ipython-input-7-ecc11858b82b> in <module>()
----> 1 df.reset_index().set_index(['index', 'a'])

/usr/local/lib/python3.5/site-packages/pandas/core/frame.py in set_index(self, keys, drop, append, inplace, verify_integrity)
   2835             arrays.append(level)
   2836
-> 2837         index = MultiIndex.from_arrays(arrays, names=names)
   2838
   2839         if verify_integrity and not index.is_unique:

/usr/local/lib/python3.5/site-packages/pandas/indexes/multi.py in from_arrays(cls, arrays, sortorder, names)
    835             return Index(arrays[0], name=name)
    836
--> 837         cats = [Categorical.from_array(arr, ordered=True) for arr in arrays]
    838         levels = [c.categories for c in cats]
    839         labels = [c.codes for c in cats]

/usr/local/lib/python3.5/site-packages/pandas/indexes/multi.py in <listcomp>(.0)
    835             return Index(arrays[0], name=name)
    836
--> 837         cats = [Categorical.from_array(arr, ordered=True) for arr in arrays]
    838         levels = [c.categories for c in cats]
    839         labels = [c.codes for c in cats]

/usr/local/lib/python3.5/site-packages/pandas/core/categorical.py in from_array(cls, data, **kwargs)
    377             the unique values of `data`.
    378         """
--> 379         return Categorical(data, **kwargs)
    380
    381     @classmethod

/usr/local/lib/python3.5/site-packages/pandas/core/categorical.py in __init__(self, values, categories, ordered, name, fastpath, levels)
    281         if categories is None:
    282             try:
--> 283                 codes, categories = factorize(values, sort=True)
    284             except TypeError:
    285                 codes, categories = factorize(values, sort=False)

/usr/local/lib/python3.5/site-packages/pandas/core/algorithms.py in factorize(values, sort, order, na_sentinel, size_hint)
    230         # reset tz
    231         uniques = DatetimeIndex(uniques.astype('M8[ns]')).tz_localize(
--> 232             values.tz)
    233     elif is_datetime:
    234         uniques = uniques.astype('M8[ns]')

/usr/local/lib/python3.5/site-packages/pandas/util/decorators.py in wrapper(*args, **kwargs)
     89                 else:
     90                     kwargs[new_arg_name] = new_arg_value
---> 91             return func(*args, **kwargs)
     92         return wrapper
     93     return _deprecate_kwarg

/usr/local/lib/python3.5/site-packages/pandas/tseries/index.py in tz_localize(self, tz, ambiguous)
   1841
   1842             new_dates = tslib.tz_localize_to_utc(self.asi8, tz,
-> 1843                                                  ambiguous=ambiguous)
   1844         new_dates = new_dates.view(_NS_DTYPE)
   1845         return self._shallow_copy(new_dates, tz=tz)

pandas/tslib.pyx in pandas.tslib.tz_localize_to_utc (pandas/tslib.c:67354)()

AmbiguousTimeError: Cannot infer dst time from Timestamp('2006-10-29 01:00:00'), try using the 'ambiguous' argument

Expected Output

In [8]: df.set_index(['index', 'a'])
Out[8]:
                              b
index                      a
2006-10-29 00:00:00-07:00  0  3
2006-10-29 01:00:00-07:00  1  4
2006-10-29 01:00:00-08:00  2  5

output of pd.show_versions()

INSTALLED VERSIONS
------------------
commit: None
python: 3.5.1.final.0
python-bits: 64
OS: Darwin
OS-release: 15.4.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: en_US.UTF-8
LANG: en_US.UTF-8

pandas: 0.18.0
nose: 1.3.7
pip: 8.1.1
setuptools: 20.9.0
Cython: 0.24
numpy: 1.11.0
scipy: 0.17.0
statsmodels: 0.6.1
xarray: 0.7.0
IPython: 4.1.2
sphinx: 1.4.1
patsy: 0.4.1
dateutil: 2.5.2
pytz: 2016.3
blosc: None
bottleneck: None
tables: 3.2.2
numexpr: 2.5.2
matplotlib: 1.5.1
openpyxl: None
xlrd: 0.9.4
xlwt: None
xlsxwriter: None
lxml: None
bs4: 4.4.1
html5lib: None
httplib2: None
apiclient: None
sqlalchemy: None
pymysql: None
psycopg2: None
jinja2: 2.8
boto: None

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugReshapingConcat, Merge/Join, Stack/Unstack, ExplodeTimezonesTimezone data dtype

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions