Skip to content

Commit 1c19eb2

Browse files
committed
separate out smaller parsing functions
1 parent b4b945a commit 1c19eb2

File tree

2 files changed

+170
-57
lines changed

2 files changed

+170
-57
lines changed

pandas/core/tools/datetimes.py

Lines changed: 149 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -236,40 +236,15 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
236236
require_iso8601 = not infer_datetime_format
237237
format = None
238238

239+
result = None
240+
tz_parsed = None
239241
try:
240-
result = None
241-
242242
if format is not None:
243-
# shortcut formatting here
244-
if format == '%Y%m%d':
245-
try:
246-
result = _attempt_YYYYMMDD(arg, errors=errors)
247-
except (ValueError, TypeError, tslibs.OutOfBoundsDatetime):
248-
raise ValueError("cannot convert the input to "
249-
"'%Y%m%d' date format")
243+
result = _parse_with_format(arg, tz, name, box, format,
244+
errors, exact, infer_datetime_format)
250245

251-
# fallback
252-
if result is None:
253-
try:
254-
result, timezones = array_strptime(
255-
arg, format, exact=exact, errors=errors)
256-
if '%Z' in format or '%z' in format:
257-
return _return_parsed_timezone_results(
258-
result, timezones, box, tz, name)
259-
except tslibs.OutOfBoundsDatetime:
260-
if errors == 'raise':
261-
raise
262-
result = arg
263-
except ValueError:
264-
# if format was inferred, try falling back
265-
# to array_to_datetime - terminate here
266-
# for specified formats
267-
if not infer_datetime_format:
268-
if errors == 'raise':
269-
raise
270-
result = arg
271-
272-
if result is None and (format is None or infer_datetime_format):
246+
if result is None:
247+
assert format is None or infer_datetime_format
273248
result, tz_parsed = tslib.array_to_datetime(
274249
arg,
275250
errors=errors,
@@ -278,35 +253,152 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
278253
yearfirst=yearfirst,
279254
require_iso8601=require_iso8601
280255
)
281-
if tz_parsed is not None:
282-
if box:
283-
# We can take a shortcut since the datetime64 numpy array
284-
# is in UTC
285-
return DatetimeIndex._simple_new(result, name=name,
286-
tz=tz_parsed)
287-
else:
288-
# Convert the datetime64 numpy array to an numpy array
289-
# of datetime objects
290-
result = [Timestamp(ts, tz=tz_parsed).to_pydatetime()
291-
for ts in result]
292-
return np.array(result, dtype=object)
293-
294-
if box:
295-
# Ensure we return an Index in all cases where box=True
296-
if is_datetime64_dtype(result):
297-
return DatetimeIndex(result, tz=tz, name=name)
298-
elif is_object_dtype(result):
299-
# e.g. an Index of datetime objects
300-
from pandas import Index
301-
return Index(result, name=name)
302-
return result
303256

304257
except ValueError as e:
258+
return _parse_fallback(arg, name, tz, e)
259+
else:
260+
return _maybe_box_date_results(result, box, tz, name, tz_parsed)
261+
262+
263+
def _parse_fallback(data, name, tz, err):
264+
"""
265+
If a ValueError is raised by either _parse_with_format or
266+
array_to_datetime, try to interpret the data as datetime objects.
267+
268+
Parameters
269+
----------
270+
data : np.ndarray[object]
271+
name : object
272+
Name to attach to returned DatetimeIndex
273+
tz : None, str, or tzinfo object
274+
err : ValueError instance
275+
276+
Returns
277+
-------
278+
DatetimeIndex
279+
280+
Raises
281+
------
282+
ValueError : if data cannot be interpreted as datetime objects.
283+
"""
284+
from pandas import DatetimeIndex
285+
try:
286+
values, tz = conversion.datetime_to_datetime64(data)
287+
return DatetimeIndex._simple_new(values, name=name, tz=tz)
288+
except (ValueError, TypeError):
289+
raise err
290+
291+
292+
def _parse_with_format(data, tz, name, box, fmt,
293+
errors, exact, infer_datetime_format):
294+
"""
295+
Parse the given data using a user-provided string format.
296+
297+
Parameters
298+
----------
299+
data : np.ndarray[object]
300+
tz : {None, 'utc'}
301+
box : bool
302+
Whether to wrap the results in an Index
303+
fmt : str
304+
strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse
305+
all the way up to nanoseconds.
306+
errors : {'ignore', 'raise', 'coerce'}
307+
- If 'raise', then invalid parsing will raise an exception
308+
- If 'coerce', then invalid parsing will be set as NaT
309+
- If 'ignore', then invalid parsing will return the input
310+
exact : bool
311+
- If True, require an exact format match.
312+
- If False, allow the format to match anywhere in the target string.
313+
infer_datetime_format : bool
314+
315+
Returns
316+
-------
317+
result : np.ndarray[object] or Index, depending on `box` argument
318+
319+
Raises
320+
------
321+
ValueError : Data cannot be parsed using the given format.
322+
"""
323+
result = None
324+
325+
if fmt == '%Y%m%d':
326+
# shortcut formatting here
305327
try:
306-
values, tz = conversion.datetime_to_datetime64(arg)
307-
return DatetimeIndex._simple_new(values, name=name, tz=tz)
308-
except (ValueError, TypeError):
309-
raise e
328+
result = _attempt_YYYYMMDD(data, errors=errors)
329+
except (ValueError, TypeError, tslibs.OutOfBoundsDatetime):
330+
raise ValueError("cannot convert the input to "
331+
"'%Y%m%d' date format")
332+
333+
if result is None:
334+
# fallback
335+
try:
336+
result, timezones = array_strptime(data, fmt,
337+
exact=exact, errors=errors)
338+
if '%Z' in fmt or '%z' in fmt:
339+
return _return_parsed_timezone_results(result, timezones,
340+
box, tz, name)
341+
except tslibs.OutOfBoundsDatetime:
342+
if errors == 'raise':
343+
raise
344+
result = data
345+
except ValueError:
346+
# if format was inferred, try falling back
347+
# to array_to_datetime - terminate here
348+
# for specified formats
349+
if not infer_datetime_format:
350+
if errors == 'raise':
351+
raise
352+
result = data
353+
354+
return result
355+
356+
357+
def _maybe_box_date_results(result, box, tz, name, tz_parsed=None):
358+
"""
359+
If requested, wrap the parsing results in an Index object, DatetimeIndex
360+
if possible.
361+
362+
Parameters
363+
----------
364+
result : np.ndarray[object], np.ndarray[int64], or Index
365+
box : bool
366+
tz : {None, 'utc'}
367+
name : str
368+
tz_parsed : None or tzinfo
369+
pytz tzinfo object inferred during parsing
370+
371+
Returns
372+
-------
373+
result : np.ndarray, Index, or DatetimeIndex
374+
"""
375+
from pandas import Index, DatetimeIndex
376+
377+
if isinstance(result, Index):
378+
# already boxed by e.g. _return_parsed_timezone_results
379+
return result
380+
381+
if tz_parsed is not None:
382+
if box:
383+
# We can take a shortcut since the datetime64 numpy array
384+
# is in UTC
385+
return DatetimeIndex._simple_new(result, name=name,
386+
tz=tz_parsed)
387+
else:
388+
# Convert the datetime64 numpy array to an numpy array
389+
# of datetime objects
390+
result = [Timestamp(ts, tz=tz_parsed).to_pydatetime()
391+
for ts in result]
392+
return np.array(result, dtype=object)
393+
394+
if box:
395+
# Ensure we return an Index in all cases where box=True
396+
if is_datetime64_dtype(result):
397+
return DatetimeIndex(result, tz=tz, name=name)
398+
elif is_object_dtype(result):
399+
# e.g. an Index of datetime objects
400+
return Index(result, name=name)
401+
return result
310402

311403

312404
def _adjust_to_origin(arg, origin, unit):

pandas/tests/indexes/datetimes/test_tools.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,27 @@ def test_to_datetime_parse_timezone_keeps_name(self):
246246

247247

248248
class TestToDatetime(object):
249+
def test_to_datetime_format_typeerror_fallback(self):
250+
# GH#23702 pass format and non-string inputs, fallback
251+
now = Timestamp.now()
252+
values = np.array([now.tz_localize('Asia/Tokyo')], dtype=np.object_)
253+
result = to_datetime(values, format="%Y%m%d")
254+
255+
expected = DatetimeIndex([now], tz='Asia/Tokyo')
256+
tm.assert_index_equal(result, expected)
257+
258+
# FIXME: flaky test; this does NOT raise on OSX py27
259+
values = np.array([now, "2018-11-12"], dtype=np.object_)
260+
with pytest.raises(ValueError):
261+
result = to_datetime(values, format="%Y%m%d",
262+
infer_datetime_format=True)
263+
264+
with pytest.raises(ValueError):
265+
# without infer_datetime_format, we fall back to
266+
# datetime_to_datetime64 but not array_to_datetime,
267+
# and so raise on seeing a string
268+
to_datetime(values, format="%Y%m%d")
269+
249270
def test_to_datetime_pydatetime(self):
250271
actual = pd.to_datetime(datetime(2008, 1, 15))
251272
assert actual == datetime(2008, 1, 15)

0 commit comments

Comments
 (0)