Skip to content

Commit 02e2bae

Browse files
authored
BUG: resolution inference with NaT ints/floats/strings (#55981)
1 parent 171cbcd commit 02e2bae

File tree

4 files changed

+39
-16
lines changed

4 files changed

+39
-16
lines changed

pandas/_libs/lib.pyx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2636,6 +2636,7 @@ def maybe_convert_objects(ndarray[object] objects,
26362636
tsobj = convert_to_tsobject(val, None, None, 0, 0)
26372637
tsobj.ensure_reso(NPY_FR_ns)
26382638
except OutOfBoundsDatetime:
2639+
# e.g. test_out_of_s_bounds_datetime64
26392640
seen.object_ = True
26402641
break
26412642
else:

pandas/_libs/tslib.pyx

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -450,12 +450,12 @@ cpdef array_to_datetime(
450450
Returns
451451
-------
452452
np.ndarray
453-
May be datetime64[ns] or object dtype
453+
May be datetime64[creso_unit] or object dtype
454454
tzinfo or None
455455
"""
456456
cdef:
457457
Py_ssize_t i, n = values.size
458-
object val, tz
458+
object val
459459
ndarray[int64_t] iresult
460460
npy_datetimestruct dts
461461
bint utc_convert = bool(utc)
@@ -467,7 +467,7 @@ cpdef array_to_datetime(
467467
_TSObject _ts
468468
float tz_offset
469469
set out_tzoffset_vals = set()
470-
tzinfo tz_out = None
470+
tzinfo tz, tz_out = None
471471
cnp.flatiter it = cnp.PyArray_IterNew(values)
472472
NPY_DATETIMEUNIT item_reso
473473
bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC
@@ -522,15 +522,14 @@ cpdef array_to_datetime(
522522

523523
elif is_integer_object(val) or is_float_object(val):
524524
# these must be ns unit by-definition
525-
item_reso = NPY_FR_ns
526-
state.update_creso(item_reso)
527-
if infer_reso:
528-
creso = state.creso
529525

530526
if val != val or val == NPY_NAT:
531527
iresult[i] = NPY_NAT
532528
else:
533-
# we now need to parse this as if unit='ns'
529+
item_reso = NPY_FR_ns
530+
state.update_creso(item_reso)
531+
if infer_reso:
532+
creso = state.creso
534533
iresult[i] = cast_from_unit(val, "ns", out_reso=creso)
535534
state.found_other = True
536535

@@ -552,6 +551,16 @@ cpdef array_to_datetime(
552551
_ts = convert_str_to_tsobject(
553552
val, None, dayfirst=dayfirst, yearfirst=yearfirst
554553
)
554+
555+
if _ts.value == NPY_NAT:
556+
# e.g. "NaT" string or empty string, we do not consider
557+
# this as either tzaware or tznaive. See
558+
# test_to_datetime_with_empty_str_utc_false_format_mixed
559+
# We also do not update resolution inference based on this,
560+
# see test_infer_with_nat_int_float_str
561+
iresult[i] = _ts.value
562+
continue
563+
555564
item_reso = _ts.creso
556565
state.update_creso(item_reso)
557566
if infer_reso:
@@ -562,12 +571,7 @@ cpdef array_to_datetime(
562571
iresult[i] = _ts.value
563572

564573
tz = _ts.tzinfo
565-
if _ts.value == NPY_NAT:
566-
# e.g. "NaT" string or empty string, we do not consider
567-
# this as either tzaware or tznaive. See
568-
# test_to_datetime_with_empty_str_utc_false_format_mixed
569-
pass
570-
elif tz is not None:
574+
if tz is not None:
571575
# dateutil timezone objects cannot be hashed, so
572576
# store the UTC offsets in seconds instead
573577
nsecs = tz.utcoffset(None).total_seconds()
@@ -640,7 +644,7 @@ cpdef array_to_datetime(
640644
# Otherwise we can use the single reso that we encountered and avoid
641645
# a second pass.
642646
abbrev = npy_unit_to_abbrev(state.creso)
643-
result = iresult.view(f"M8[{abbrev}]")
647+
result = iresult.view(f"M8[{abbrev}]").reshape(result.shape)
644648
return result, tz_out
645649

646650

pandas/_libs/tslibs/strptime.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ cdef _get_format_regex(str fmt):
240240

241241

242242
cdef class DatetimeParseState:
243-
def __cinit__(self, NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_ns):
243+
def __cinit__(self, NPY_DATETIMEUNIT creso):
244244
# found_tz and found_naive are specifically about datetime/Timestamp
245245
# objects with and without tzinfos attached.
246246
self.found_tz = False

pandas/tests/tslibs/test_array_to_datetime.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,24 @@ def test_infer_heterogeneous(self):
8282
assert tz is None
8383
tm.assert_numpy_array_equal(result, expected[::-1])
8484

85+
@pytest.mark.parametrize(
86+
"item", [float("nan"), NaT.value, float(NaT.value), "NaT", ""]
87+
)
88+
def test_infer_with_nat_int_float_str(self, item):
89+
# floats/ints get inferred to nanos *unless* they are NaN/iNaT,
90+
# similar NaT string gets treated like NaT scalar (ignored for resolution)
91+
dt = datetime(2023, 11, 15, 15, 5, 6)
92+
93+
arr = np.array([dt, item], dtype=object)
94+
result, tz = tslib.array_to_datetime(arr, creso=creso_infer)
95+
assert tz is None
96+
expected = np.array([dt, np.datetime64("NaT")], dtype="M8[us]")
97+
tm.assert_numpy_array_equal(result, expected)
98+
99+
result2, tz2 = tslib.array_to_datetime(arr[::-1], creso=creso_infer)
100+
assert tz2 is None
101+
tm.assert_numpy_array_equal(result2, expected[::-1])
102+
85103

86104
class TestArrayToDatetimeWithTZResolutionInference:
87105
def test_array_to_datetime_with_tz_resolution(self):

0 commit comments

Comments
 (0)