Closed
Description
Doing a merge with how="outer"
on an integer / all-NaN column started failing.
This works in pandas 1.3.2:
df1 = pd.DataFrame({"key": [1, 2], "col1": [1, 2]})
df2 = pd.DataFrame({"key": [np.nan, np.nan], "col2": [3, 4]})
In [3]: df1.merge(df2, on="key", how="outer")
Out[3]:
key col1 col2
0 1.0 1.0 NaN
1 2.0 2.0 NaN
2 NaN NaN 3.0
3 NaN NaN 4.0
However, this started failing on master some time ago, and apparently now also in pandas 1.3.3:
In [4]: df1.merge(df2, on="key", how="outer")
---------------------------------------------------------------------------
IntCastingNaNError Traceback (most recent call last)
<ipython-input-4-d831b6be2611> in <module>
----> 1 df1.merge(df2, on="key", how="outer")
~/miniconda3/envs/pandas13/lib/python3.9/site-packages/pandas/core/frame.py in merge(self, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
9189 from pandas.core.reshape.merge import merge
9190
-> 9191 return merge(
9192 self,
9193 right,
~/miniconda3/envs/pandas13/lib/python3.9/site-packages/pandas/core/reshape/merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
118 validate=validate,
119 )
--> 120 return op.get_result()
121
122
~/miniconda3/envs/pandas13/lib/python3.9/site-packages/pandas/core/reshape/merge.py in get_result(self)
734 result = self._indicator_post_merge(result)
735
--> 736 self._maybe_add_join_keys(result, left_indexer, right_indexer)
737
738 self._maybe_restore_index_levels(result)
~/miniconda3/envs/pandas13/lib/python3.9/site-packages/pandas/core/reshape/merge.py in _maybe_add_join_keys(self, result, left_indexer, right_indexer)
915
916 if result._is_label_reference(name):
--> 917 result[name] = Series(
918 key_col, dtype=result_dtype, index=result.index
919 )
~/miniconda3/envs/pandas13/lib/python3.9/site-packages/pandas/core/series.py in __init__(self, data, index, dtype, name, copy, fastpath)
381 if dtype is not None:
382 # astype copies
--> 383 data = data.astype(dtype)
384 else:
385 # GH#24096 we need to ensure the index remains immutable
~/miniconda3/envs/pandas13/lib/python3.9/site-packages/pandas/core/indexes/numeric.py in astype(self, dtype, copy)
221 # TODO(jreback); this can change once we have an EA Index type
222 # GH 13149
--> 223 arr = astype_nansafe(self._values, dtype=dtype)
224 return Int64Index(arr, name=self.name)
225
~/miniconda3/envs/pandas13/lib/python3.9/site-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna)
1166
1167 elif np.issubdtype(arr.dtype, np.floating) and np.issubdtype(dtype, np.integer):
-> 1168 return astype_float_to_int_nansafe(arr, dtype, copy)
1169
1170 elif is_object_dtype(arr):
~/miniconda3/envs/pandas13/lib/python3.9/site-packages/pandas/core/dtypes/cast.py in astype_float_to_int_nansafe(values, dtype, copy)
1211 """
1212 if not np.isfinite(values).all():
-> 1213 raise IntCastingNaNError(
1214 "Cannot convert non-finite values (NA or inf) to integer"
1215 )
IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer
This breaks GeoPandas' overlay
function (cfr geopandas/geopandas#2101)