Skip to content

Commit afb1bee

Browse files
committed
Merge remote-tracking branch 'upstream/master' into 24986-nested-array
2 parents 86948a1 + 145ade2 commit afb1bee

19 files changed

+316
-134
lines changed

doc/source/user_guide/io.rst

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -989,6 +989,36 @@ a single date rather than the entire array.
989989
990990
os.remove('tmp.csv')
991991
992+
993+
.. _io.csv.mixed_timezones:
994+
995+
Parsing a CSV with mixed Timezones
996+
++++++++++++++++++++++++++++++++++
997+
998+
Pandas cannot natively represent a column or index with mixed timezones. If your CSV
999+
file contains columns with a mixture of timezones, the default result will be
1000+
an object-dtype column with strings, even with ``parse_dates``.
1001+
1002+
1003+
.. ipython:: python
1004+
1005+
content = """\
1006+
a
1007+
2000-01-01T00:00:00+05:00
1008+
2000-01-01T00:00:00+06:00"""
1009+
df = pd.read_csv(StringIO(content), parse_dates=['a'])
1010+
df['a']
1011+
1012+
To parse the mixed-timezone values as a datetime column, pass a partially-applied
1013+
:func:`to_datetime` with ``utc=True`` as the ``date_parser``.
1014+
1015+
.. ipython:: python
1016+
1017+
df = pd.read_csv(StringIO(content), parse_dates=['a'],
1018+
date_parser=lambda col: pd.to_datetime(col, utc=True))
1019+
df['a']
1020+
1021+
9921022
.. _io.dayfirst:
9931023

9941024

doc/source/whatsnew/v0.24.0.rst

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,52 @@ that the dates have been converted to UTC
648648
pd.to_datetime(["2015-11-18 15:30:00+05:30",
649649
"2015-11-18 16:30:00+06:30"], utc=True)
650650
651+
652+
.. _whatsnew_0240.api_breaking.read_csv_mixed_tz:
653+
654+
Parsing mixed-timezones with :func:`read_csv`
655+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
656+
657+
:func:`read_csv` no longer silently converts mixed-timezone columns to UTC (:issue:`24987`).
658+
659+
*Previous Behavior*
660+
661+
.. code-block:: python
662+
663+
>>> import io
664+
>>> content = """\
665+
... a
666+
... 2000-01-01T00:00:00+05:00
667+
... 2000-01-01T00:00:00+06:00"""
668+
>>> df = pd.read_csv(io.StringIO(content), parse_dates=['a'])
669+
>>> df.a
670+
0 1999-12-31 19:00:00
671+
1 1999-12-31 18:00:00
672+
Name: a, dtype: datetime64[ns]
673+
674+
*New Behavior*
675+
676+
.. ipython:: python
677+
678+
import io
679+
content = """\
680+
a
681+
2000-01-01T00:00:00+05:00
682+
2000-01-01T00:00:00+06:00"""
683+
df = pd.read_csv(io.StringIO(content), parse_dates=['a'])
684+
df.a
685+
686+
As can be seen, the ``dtype`` is object; each value in the column is a string.
687+
To convert the strings to an array of datetimes, the ``date_parser`` argument
688+
689+
.. ipython:: python
690+
691+
df = pd.read_csv(io.StringIO(content), parse_dates=['a'],
692+
date_parser=lambda col: pd.to_datetime(col, utc=True))
693+
df.a
694+
695+
See :ref:`whatsnew_0240.api.timezone_offset_parsing` for more.
696+
651697
.. _whatsnew_0240.api_breaking.period_end_time:
652698

653699
Time values in ``dt.end_time`` and ``to_timestamp(how='end')``

doc/source/whatsnew/v0.24.1.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,11 @@ Bug Fixes
7474

7575
- Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`)
7676

77+
**Visualization**
78+
79+
- Fixed the warning for implicitly registered matplotlib converters not showing. See :ref:`whatsnew_0211.converters` for more (:issue:`24963`).
80+
81+
7782
**Other**
7883

7984
-

pandas/core/arrays/datetimes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2058,7 +2058,7 @@ def validate_tz_from_dtype(dtype, tz):
20582058
# tz-naive dtype (i.e. datetime64[ns])
20592059
if tz is not None and not timezones.tz_compare(tz, dtz):
20602060
raise ValueError("cannot supply both a tz and a "
2061-
"timezone-naive dtype (i.e. datetime64[ns]")
2061+
"timezone-naive dtype (i.e. datetime64[ns])")
20622062

20632063
return tz
20642064

pandas/io/parsers.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,9 +203,14 @@
203203
* dict, e.g. {{'foo' : [1, 3]}} -> parse columns 1, 3 as date and call
204204
result 'foo'
205205
206-
If a column or index contains an unparseable date, the entire column or
207-
index will be returned unaltered as an object data type. For non-standard
208-
datetime parsing, use ``pd.to_datetime`` after ``pd.read_csv``
206+
If a column or index cannot be represented as an array of datetimes,
207+
say because of an unparseable value or a mixture of timezones, the column
208+
or index will be returned unaltered as an object data type. For
209+
non-standard datetime parsing, use ``pd.to_datetime`` after
210+
``pd.read_csv``. To parse an index or column with a mixture of timezones,
211+
specify ``date_parser`` to be a partially-applied
212+
:func:`pandas.to_datetime` with ``utc=True``. See
213+
:ref:`io.csv.mixed_timezones` for more.
209214
210215
Note: A fast-path exists for iso8601-formatted dates.
211216
infer_datetime_format : bool, default False

pandas/plotting/_core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
else:
4040
_HAS_MPL = True
4141
if get_option('plotting.matplotlib.register_converters'):
42-
_converter.register(explicit=True)
42+
_converter.register(explicit=False)
4343

4444

4545
def _raise_if_no_mpl():

pandas/tests/indexes/common.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,12 @@ def setup_indices(self):
3030

3131
def test_pickle_compat_construction(self):
3232
# need an object to create with
33-
pytest.raises(TypeError, self._holder)
33+
msg = (r"Index\(\.\.\.\) must be called with a collection of some"
34+
r" kind, None was passed|"
35+
r"__new__\(\) missing 1 required positional argument: 'data'|"
36+
r"__new__\(\) takes at least 2 arguments \(1 given\)")
37+
with pytest.raises(TypeError, match=msg):
38+
self._holder()
3439

3540
def test_to_series(self):
3641
# assert that we are creating a copy of the index
@@ -84,8 +89,11 @@ def test_shift(self):
8489

8590
# GH8083 test the base class for shift
8691
idx = self.create_index()
87-
pytest.raises(NotImplementedError, idx.shift, 1)
88-
pytest.raises(NotImplementedError, idx.shift, 1, 2)
92+
msg = "Not supported for type {}".format(type(idx).__name__)
93+
with pytest.raises(NotImplementedError, match=msg):
94+
idx.shift(1)
95+
with pytest.raises(NotImplementedError, match=msg):
96+
idx.shift(1, 2)
8997

9098
def test_create_index_existing_name(self):
9199

pandas/tests/indexes/datetimes/test_construction.py

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,10 @@ def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture):
135135
tm.assert_index_equal(i2, expected)
136136

137137
# incompat tz/dtype
138-
pytest.raises(ValueError, lambda: DatetimeIndex(
139-
i.tz_localize(None).asi8, dtype=i.dtype, tz='US/Pacific'))
138+
msg = "cannot supply both a tz and a dtype with a tz"
139+
with pytest.raises(ValueError, match=msg):
140+
DatetimeIndex(i.tz_localize(None).asi8,
141+
dtype=i.dtype, tz='US/Pacific')
140142

141143
def test_construction_index_with_mixed_timezones(self):
142144
# gh-11488: no tz results in DatetimeIndex
@@ -439,14 +441,19 @@ def test_constructor_coverage(self):
439441
tm.assert_index_equal(from_ints, expected)
440442

441443
# non-conforming
442-
pytest.raises(ValueError, DatetimeIndex,
443-
['2000-01-01', '2000-01-02', '2000-01-04'], freq='D')
444+
msg = ("Inferred frequency None from passed values does not conform"
445+
" to passed frequency D")
446+
with pytest.raises(ValueError, match=msg):
447+
DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04'], freq='D')
444448

445-
pytest.raises(ValueError, date_range, start='2011-01-01',
446-
freq='b')
447-
pytest.raises(ValueError, date_range, end='2011-01-01',
448-
freq='B')
449-
pytest.raises(ValueError, date_range, periods=10, freq='D')
449+
msg = ("Of the four parameters: start, end, periods, and freq, exactly"
450+
" three must be specified")
451+
with pytest.raises(ValueError, match=msg):
452+
date_range(start='2011-01-01', freq='b')
453+
with pytest.raises(ValueError, match=msg):
454+
date_range(end='2011-01-01', freq='B')
455+
with pytest.raises(ValueError, match=msg):
456+
date_range(periods=10, freq='D')
450457

451458
@pytest.mark.parametrize('freq', ['AS', 'W-SUN'])
452459
def test_constructor_datetime64_tzformat(self, freq):
@@ -511,18 +518,20 @@ def test_constructor_dtype(self):
511518
idx = DatetimeIndex(['2013-01-01', '2013-01-02'],
512519
dtype='datetime64[ns, US/Eastern]')
513520

514-
pytest.raises(ValueError,
515-
lambda: DatetimeIndex(idx,
516-
dtype='datetime64[ns]'))
521+
msg = ("cannot supply both a tz and a timezone-naive dtype"
522+
r" \(i\.e\. datetime64\[ns\]\)")
523+
with pytest.raises(ValueError, match=msg):
524+
DatetimeIndex(idx, dtype='datetime64[ns]')
517525

518526
# this is effectively trying to convert tz's
519-
pytest.raises(TypeError,
520-
lambda: DatetimeIndex(idx,
521-
dtype='datetime64[ns, CET]'))
522-
pytest.raises(ValueError,
523-
lambda: DatetimeIndex(
524-
idx, tz='CET',
525-
dtype='datetime64[ns, US/Eastern]'))
527+
msg = ("data is already tz-aware US/Eastern, unable to set specified"
528+
" tz: CET")
529+
with pytest.raises(TypeError, match=msg):
530+
DatetimeIndex(idx, dtype='datetime64[ns, CET]')
531+
msg = "cannot supply both a tz and a dtype with a tz"
532+
with pytest.raises(ValueError, match=msg):
533+
DatetimeIndex(idx, tz='CET', dtype='datetime64[ns, US/Eastern]')
534+
526535
result = DatetimeIndex(idx, dtype='datetime64[ns, US/Eastern]')
527536
tm.assert_index_equal(idx, result)
528537

@@ -732,7 +741,9 @@ def test_from_freq_recreate_from_data(self, freq):
732741

733742
def test_datetimeindex_constructor_misc(self):
734743
arr = ['1/1/2005', '1/2/2005', 'Jn 3, 2005', '2005-01-04']
735-
pytest.raises(Exception, DatetimeIndex, arr)
744+
msg = r"(\(u?')?Unknown string format(:', 'Jn 3, 2005'\))?"
745+
with pytest.raises(ValueError, match=msg):
746+
DatetimeIndex(arr)
736747

737748
arr = ['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04']
738749
idx1 = DatetimeIndex(arr)

pandas/tests/indexes/datetimes/test_date_range.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -346,8 +346,10 @@ def test_compat_replace(self, f):
346346
def test_catch_infinite_loop(self):
347347
offset = offsets.DateOffset(minute=5)
348348
# blow up, don't loop forever
349-
pytest.raises(Exception, date_range, datetime(2011, 11, 11),
350-
datetime(2011, 11, 12), freq=offset)
349+
msg = "Offset <DateOffset: minute=5> did not increment date"
350+
with pytest.raises(ValueError, match=msg):
351+
date_range(datetime(2011, 11, 11), datetime(2011, 11, 12),
352+
freq=offset)
351353

352354
@pytest.mark.parametrize('periods', (1, 2))
353355
def test_wom_len(self, periods):

pandas/tests/indexes/datetimes/test_misc.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,9 @@ def test_datetimeindex_accessors(self):
190190
# Ensure is_start/end accessors throw ValueError for CustomBusinessDay,
191191
bday_egypt = offsets.CustomBusinessDay(weekmask='Sun Mon Tue Wed Thu')
192192
dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt)
193-
pytest.raises(ValueError, lambda: dti.is_month_start)
193+
msg = "Custom business days is not supported by is_month_start"
194+
with pytest.raises(ValueError, match=msg):
195+
dti.is_month_start
194196

195197
dti = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'])
196198

pandas/tests/indexes/datetimes/test_ops.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,19 @@ def test_ops_properties_basic(self):
3737

3838
# sanity check that the behavior didn't change
3939
# GH#7206
40+
msg = "'Series' object has no attribute '{}'"
4041
for op in ['year', 'day', 'second', 'weekday']:
41-
pytest.raises(TypeError, lambda x: getattr(self.dt_series, op))
42+
with pytest.raises(AttributeError, match=msg.format(op)):
43+
getattr(self.dt_series, op)
4244

4345
# attribute access should still work!
4446
s = Series(dict(year=2000, month=1, day=10))
4547
assert s.year == 2000
4648
assert s.month == 1
4749
assert s.day == 10
48-
pytest.raises(AttributeError, lambda: s.weekday)
50+
msg = "'Series' object has no attribute 'weekday'"
51+
with pytest.raises(AttributeError, match=msg):
52+
s.weekday
4953

5054
def test_repeat_range(self, tz_naive_fixture):
5155
tz = tz_naive_fixture

pandas/tests/indexes/datetimes/test_partial_slicing.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,8 @@ def test_partial_slice(self):
170170
result = s['2005-1-1']
171171
assert result == s.iloc[0]
172172

173-
pytest.raises(Exception, s.__getitem__, '2004-12-31')
173+
with pytest.raises(KeyError, match=r"^'2004-12-31'$"):
174+
s['2004-12-31']
174175

175176
def test_partial_slice_daily(self):
176177
rng = date_range(freq='H', start=datetime(2005, 1, 31), periods=500)
@@ -179,7 +180,8 @@ def test_partial_slice_daily(self):
179180
result = s['2005-1-31']
180181
tm.assert_series_equal(result, s.iloc[:24])
181182

182-
pytest.raises(Exception, s.__getitem__, '2004-12-31 00')
183+
with pytest.raises(KeyError, match=r"^'2004-12-31 00'$"):
184+
s['2004-12-31 00']
183185

184186
def test_partial_slice_hourly(self):
185187
rng = date_range(freq='T', start=datetime(2005, 1, 1, 20, 0, 0),
@@ -193,7 +195,8 @@ def test_partial_slice_hourly(self):
193195
tm.assert_series_equal(result, s.iloc[:60])
194196

195197
assert s['2005-1-1 20:00'] == s.iloc[0]
196-
pytest.raises(Exception, s.__getitem__, '2004-12-31 00:15')
198+
with pytest.raises(KeyError, match=r"^'2004-12-31 00:15'$"):
199+
s['2004-12-31 00:15']
197200

198201
def test_partial_slice_minutely(self):
199202
rng = date_range(freq='S', start=datetime(2005, 1, 1, 23, 59, 0),
@@ -207,7 +210,8 @@ def test_partial_slice_minutely(self):
207210
tm.assert_series_equal(result, s.iloc[:60])
208211

209212
assert s[Timestamp('2005-1-1 23:59:00')] == s.iloc[0]
210-
pytest.raises(Exception, s.__getitem__, '2004-12-31 00:00:00')
213+
with pytest.raises(KeyError, match=r"^'2004-12-31 00:00:00'$"):
214+
s['2004-12-31 00:00:00']
211215

212216
def test_partial_slice_second_precision(self):
213217
rng = date_range(start=datetime(2005, 1, 1, 0, 0, 59,
@@ -255,7 +259,9 @@ def test_partial_slicing_dataframe(self):
255259
result = df['a'][ts_string]
256260
assert isinstance(result, np.int64)
257261
assert result == expected
258-
pytest.raises(KeyError, df.__getitem__, ts_string)
262+
msg = r"^'{}'$".format(ts_string)
263+
with pytest.raises(KeyError, match=msg):
264+
df[ts_string]
259265

260266
# Timestamp with resolution less precise than index
261267
for fmt in formats[:rnum]:
@@ -282,15 +288,20 @@ def test_partial_slicing_dataframe(self):
282288
result = df['a'][ts_string]
283289
assert isinstance(result, np.int64)
284290
assert result == 2
285-
pytest.raises(KeyError, df.__getitem__, ts_string)
291+
msg = r"^'{}'$".format(ts_string)
292+
with pytest.raises(KeyError, match=msg):
293+
df[ts_string]
286294

287295
# Not compatible with existing key
288296
# Should raise KeyError
289297
for fmt, res in list(zip(formats, resolutions))[rnum + 1:]:
290298
ts = index[1] + Timedelta("1 " + res)
291299
ts_string = ts.strftime(fmt)
292-
pytest.raises(KeyError, df['a'].__getitem__, ts_string)
293-
pytest.raises(KeyError, df.__getitem__, ts_string)
300+
msg = r"^'{}'$".format(ts_string)
301+
with pytest.raises(KeyError, match=msg):
302+
df['a'][ts_string]
303+
with pytest.raises(KeyError, match=msg):
304+
df[ts_string]
294305

295306
def test_partial_slicing_with_multiindex(self):
296307

@@ -316,11 +327,10 @@ def test_partial_slicing_with_multiindex(self):
316327

317328
# this is an IndexingError as we don't do partial string selection on
318329
# multi-levels.
319-
def f():
330+
msg = "Too many indexers"
331+
with pytest.raises(IndexingError, match=msg):
320332
df_multi.loc[('2013-06-19', 'ACCT1', 'ABC')]
321333

322-
pytest.raises(IndexingError, f)
323-
324334
# GH 4294
325335
# partial slice on a series mi
326336
s = pd.DataFrame(np.random.rand(1000, 1000), index=pd.date_range(

0 commit comments

Comments
 (0)