Skip to content

Commit 4d16e70

Browse files
authored
BUG: frame[object].astype(M8[unsupported]) not raising (#50015)
1 parent 60ed993 commit 4d16e70

File tree

6 files changed

+39
-29
lines changed

6 files changed

+39
-29
lines changed

pandas/core/dtypes/astype.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -135,16 +135,15 @@ def astype_nansafe(
135135
elif is_object_dtype(arr.dtype):
136136

137137
# if we have a datetime/timedelta array of objects
138-
# then coerce to a proper dtype and recall astype_nansafe
138+
# then coerce to datetime64[ns] and use DatetimeArray.astype
139139

140140
if is_datetime64_dtype(dtype):
141141
from pandas import to_datetime
142142

143-
return astype_nansafe(
144-
to_datetime(arr.ravel()).values.reshape(arr.shape),
145-
dtype,
146-
copy=copy,
147-
)
143+
dti = to_datetime(arr.ravel())
144+
dta = dti._data.reshape(arr.shape)
145+
return dta.astype(dtype, copy=False)._ndarray
146+
148147
elif is_timedelta64_dtype(dtype):
149148
# bc we know arr.dtype == object, this is equivalent to
150149
# `np.asarray(to_timedelta(arr))`, but using a lower-level API that

pandas/tests/dtypes/test_common.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -757,7 +757,13 @@ def test_astype_datetime64_bad_dtype_raises(from_type, to_type):
757757

758758
to_type = np.dtype(to_type)
759759

760-
with pytest.raises(TypeError, match="cannot astype"):
760+
msg = "|".join(
761+
[
762+
"cannot astype a timedelta",
763+
"cannot astype a datetimelike",
764+
]
765+
)
766+
with pytest.raises(TypeError, match=msg):
761767
astype_nansafe(arr, dtype=to_type)
762768

763769

pandas/tests/frame/methods/test_astype.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,16 @@ def test_astype_column_metadata(self, dtype):
377377
df = df.astype(dtype)
378378
tm.assert_index_equal(df.columns, columns)
379379

380+
@pytest.mark.parametrize("unit", ["Y", "M", "W", "D", "h", "m"])
381+
def test_astype_from_object_to_datetime_unit(self, unit):
382+
vals = [
383+
["2015-01-01", "2015-01-02", "2015-01-03"],
384+
["2017-01-01", "2017-01-02", "2017-02-03"],
385+
]
386+
df = DataFrame(vals, dtype=object)
387+
with pytest.raises(TypeError, match="Cannot cast"):
388+
df.astype(f"M8[{unit}]")
389+
380390
@pytest.mark.parametrize("dtype", ["M8", "m8"])
381391
@pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"])
382392
def test_astype_from_datetimelike_to_object(self, dtype, unit):

pandas/tests/frame/test_constructors.py

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1955,19 +1955,11 @@ def test_constructor_datetimes_with_nulls(self, arr):
19551955

19561956
@pytest.mark.parametrize("order", ["K", "A", "C", "F"])
19571957
@pytest.mark.parametrize(
1958-
"dtype",
1959-
[
1960-
"datetime64[M]",
1961-
"datetime64[D]",
1962-
"datetime64[h]",
1963-
"datetime64[m]",
1964-
"datetime64[s]",
1965-
"datetime64[ms]",
1966-
"datetime64[us]",
1967-
"datetime64[ns]",
1968-
],
1958+
"unit",
1959+
["M", "D", "h", "m", "s", "ms", "us", "ns"],
19691960
)
1970-
def test_constructor_datetimes_non_ns(self, order, dtype):
1961+
def test_constructor_datetimes_non_ns(self, order, unit):
1962+
dtype = f"datetime64[{unit}]"
19711963
na = np.array(
19721964
[
19731965
["2015-01-01", "2015-01-02", "2015-01-03"],
@@ -1977,13 +1969,16 @@ def test_constructor_datetimes_non_ns(self, order, dtype):
19771969
order=order,
19781970
)
19791971
df = DataFrame(na)
1980-
expected = DataFrame(
1981-
[
1982-
["2015-01-01", "2015-01-02", "2015-01-03"],
1983-
["2017-01-01", "2017-01-02", "2017-02-03"],
1984-
]
1985-
)
1986-
expected = expected.astype(dtype=dtype)
1972+
expected = DataFrame(na.astype("M8[ns]"))
1973+
if unit in ["M", "D", "h", "m"]:
1974+
with pytest.raises(TypeError, match="Cannot cast"):
1975+
expected.astype(dtype)
1976+
1977+
# instead the constructor casts to the closest supported reso, i.e. "s"
1978+
expected = expected.astype("datetime64[s]")
1979+
else:
1980+
expected = expected.astype(dtype=dtype)
1981+
19871982
tm.assert_frame_equal(df, expected)
19881983

19891984
@pytest.mark.parametrize("order", ["K", "A", "C", "F"])

pandas/tests/io/xml/test_xml_dtypes.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,14 +128,14 @@ def test_dtypes_with_names(parser):
128128
df_result = read_xml(
129129
xml_dates,
130130
names=["Col1", "Col2", "Col3", "Col4"],
131-
dtype={"Col2": "string", "Col3": "Int64", "Col4": "datetime64"},
131+
dtype={"Col2": "string", "Col3": "Int64", "Col4": "datetime64[ns]"},
132132
parser=parser,
133133
)
134134
df_iter = read_xml_iterparse(
135135
xml_dates,
136136
parser=parser,
137137
names=["Col1", "Col2", "Col3", "Col4"],
138-
dtype={"Col2": "string", "Col3": "Int64", "Col4": "datetime64"},
138+
dtype={"Col2": "string", "Col3": "Int64", "Col4": "datetime64[ns]"},
139139
iterparse={"row": ["shape", "degrees", "sides", "date"]},
140140
)
141141

pandas/tests/reshape/merge/test_merge.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -730,13 +730,13 @@ def test_other_datetime_unit(self, unit):
730730
ser = Series([None, None], index=[101, 102], name="days")
731731

732732
dtype = f"datetime64[{unit}]"
733-
df2 = ser.astype(dtype).to_frame("days")
734733

735734
if unit in ["D", "h", "m"]:
736735
# not supported so we cast to the nearest supported unit, seconds
737736
exp_dtype = "datetime64[s]"
738737
else:
739738
exp_dtype = dtype
739+
df2 = ser.astype(exp_dtype).to_frame("days")
740740
assert df2["days"].dtype == exp_dtype
741741

742742
result = df1.merge(df2, left_on="entity_id", right_index=True)

0 commit comments

Comments
 (0)