Skip to content

BUG: JoinUnit._is_valid_na_for #43043

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 24 additions & 11 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@

import numpy as np

from pandas._libs import internals as libinternals
from pandas._libs import (
NaT,
internals as libinternals,
)
from pandas._libs.missing import NA
from pandas._typing import (
ArrayLike,
DtypeObj,
Expand All @@ -28,7 +32,7 @@
is_1d_only_ea_obj,
is_datetime64tz_dtype,
is_dtype_equal,
is_extension_array_dtype,
needs_i8_conversion,
)
from pandas.core.dtypes.concat import (
cast_to_common_type,
Expand Down Expand Up @@ -374,13 +378,20 @@ def _is_valid_na_for(self, dtype: DtypeObj) -> bool:
values = self.block.values
return all(is_valid_na_for_dtype(x, dtype) for x in values.ravel(order="K"))

if self.dtype.kind == dtype.kind == "M" and not is_dtype_equal(
self.dtype, dtype
):
na_value = self.block.fill_value
if na_value is NaT and not is_dtype_equal(self.dtype, dtype):
# e.g. we are dt64 and other is td64
# fill_values match but we should not cast self.block.values to dtype
# TODO: this will need updating if we ever have non-nano dt64/td64
return False

na_value = self.block.fill_value
if na_value is NA and needs_i8_conversion(dtype):
# FIXME: kludge; test_append_empty_frame_with_timedelta64ns_nat
# e.g. self.dtype == "Int64" and dtype is td64, we dont want
# to consider these as matching
return False

# TODO: better to use can_hold_element?
return is_valid_na_for_dtype(na_value, dtype)

@cache_readonly
Expand Down Expand Up @@ -426,9 +437,6 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
i8values = np.full(self.shape, fill_value.value)
return DatetimeArray(i8values, dtype=empty_dtype)

elif is_extension_array_dtype(blk_dtype):
pass

elif is_1d_only_ea_dtype(empty_dtype):
empty_dtype = cast(ExtensionDtype, empty_dtype)
cls = empty_dtype.construct_array_type()
Expand All @@ -440,11 +448,16 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
return missing_arr.take(
empty_arr, allow_fill=True, fill_value=fill_value
)
elif isinstance(empty_dtype, ExtensionDtype):
# TODO: no tests get here, a handful would if we disabled
# the dt64tz special-case above (which is faster)
cls = empty_dtype.construct_array_type()
missing_arr = cls._empty(shape=self.shape, dtype=empty_dtype)
missing_arr[:] = fill_value
return missing_arr
else:
# NB: we should never get here with empty_dtype integer or bool;
# if we did, the missing_arr.fill would cast to gibberish
empty_dtype = cast(np.dtype, empty_dtype)

missing_arr = np.empty(self.shape, dtype=empty_dtype)
missing_arr.fill(fill_value)
return missing_arr
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/reshape/concat/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,12 @@ def test_append_empty_tz_frame_with_datetime64ns(self):
expected = DataFrame({"a": [pd.NaT]}).astype(object)
tm.assert_frame_equal(result, expected)

# mismatched tz
other = Series({"a": pd.NaT}, dtype="datetime64[ns, US/Pacific]")
result = df.append(other, ignore_index=True)
expected = DataFrame({"a": [pd.NaT]}).astype(object)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"dtype_str", ["datetime64[ns, UTC]", "datetime64[ns]", "Int64", "int64"]
)
Expand Down