Skip to content

Commit aae7d6e

Browse files
committed
BUG: fix read_csv to parse timezone correctly
- make the csv parsing compatible with `box=True` of `to_datetime`
1 parent 70e6f7c commit aae7d6e

File tree

3 files changed

+33
-8
lines changed

3 files changed

+33
-8
lines changed

doc/source/whatsnew/v0.24.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,7 @@ I/O
673673

674674
- :func:`read_html()` no longer ignores all-whitespace ``<tr>`` within ``<thead>`` when considering the ``skiprows`` and ``header`` arguments. Previously, users had to decrease their ``header`` and ``skiprows`` values on such tables to work around the issue. (:issue:`21641`)
675675
- :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`)
676+
- :func:`read_csv()` will correctly parse timezone-aware datetimes. (:issue:`22256`)
676677
-
677678

678679
Plotting

pandas/io/parsers.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525
from pandas.core.dtypes.missing import isna
2626
from pandas.core.dtypes.cast import astype_nansafe
2727
from pandas.core.index import (Index, MultiIndex, RangeIndex,
28-
ensure_index_from_sequences)
28+
ensure_index_from_sequences,
29+
DatetimeIndex)
2930
from pandas.core.series import Series
3031
from pandas.core.frame import DataFrame
3132
from pandas.core.arrays import Categorical
@@ -1589,11 +1590,13 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
15891590
else:
15901591
# skip inference if specified dtype is object
15911592
try_num_bool = not (cast_type and is_string_dtype(cast_type))
1592-
1593-
# general type inference and conversion
1594-
cvals, na_count = self._infer_types(
1595-
values, set(col_na_values) | col_na_fvalues,
1596-
try_num_bool)
1593+
if isinstance(values, np.ndarray):
1594+
# general type inference and conversion
1595+
cvals, na_count = self._infer_types(
1596+
values, set(col_na_values) | col_na_fvalues,
1597+
try_num_bool)
1598+
else: # _infer_types only accepts ndarray.
1599+
cvals = values
15971600

15981601
# type specified in dtype param
15991602
if cast_type and not is_dtype_equal(cvals, cast_type):
@@ -3030,14 +3033,19 @@ def converter(*date_cols):
30303033
strs = _concat_date_cols(date_cols)
30313034

30323035
try:
3033-
return tools.to_datetime(
3036+
converted = tools.to_datetime(
30343037
ensure_object(strs),
30353038
utc=None,
3036-
box=False,
3039+
box=True,
30373040
dayfirst=dayfirst,
30383041
errors='ignore',
30393042
infer_datetime_format=infer_datetime_format
30403043
)
3044+
if not isinstance(converted, DatetimeIndex):
3045+
# GH-22256 : non-datetime Index needs to be
3046+
# converted to ndarray to avoid downstream errors
3047+
return np.array(converted)
3048+
return converted
30413049
except:
30423050
return tools.to_datetime(
30433051
parsing.try_parse_dates(strs, dayfirst=dayfirst))

pandas/tests/io/parser/parse_dates.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -674,3 +674,19 @@ def test_parse_date_float(self, data, expected, parse_dates):
674674
# (i.e. float precision should remain unchanged).
675675
result = self.read_csv(StringIO(data), parse_dates=parse_dates)
676676
tm.assert_frame_equal(result, expected)
677+
678+
def test_parse_timezone(self):
679+
import pytz
680+
data = """dt,val
681+
2018-01-04 09:01:00+09:00,23350
682+
2018-01-04 09:02:00+09:00,23400
683+
2018-01-04 09:03:00+09:00,23400
684+
2018-01-04 09:04:00+09:00,23400
685+
2018-01-04 09:05:00+09:00,23400"""
686+
parsed = self.read_csv(StringIO(data), parse_dates=['dt'])
687+
dti = pd.DatetimeIndex(start='2018-01-04 09:01:00',
688+
end='2018-01-04 09:05:00', freq='1min',
689+
tz=pytz.FixedOffset(540))
690+
expected_data = {'dt': dti, 'val': [23350, 23400, 23400, 23400, 23400]}
691+
expected = DataFrame(expected_data)
692+
tm.assert_frame_equal(parsed, expected)

0 commit comments

Comments
 (0)