Skip to content

Commit 71eb2e7

Browse files
committed
BUG: Raise TypeError when joining with non-DataFrame using 'on=' (GH#61434)
1 parent 5b0767a commit 71eb2e7

File tree

3 files changed

+47
-0
lines changed

3 files changed

+47
-0
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,7 @@ Reshaping
846846
- Bug in :meth:`DataFrame.stack` with the new implementation where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`)
847847
- Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
848848
- Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`)
849+
- Bug in :meth:`DataFrame.join` where passing a non-pandas object like a ``polars.DataFrame`` with the ``on=`` parameter raised a misleading error message instead of a ``TypeError``. (:issue:`61434`)
849850

850851
Sparse
851852
^^^^^^

pandas/core/frame.py

+17
Original file line numberDiff line numberDiff line change
@@ -10885,6 +10885,23 @@ def join(
1088510885
raise ValueError("Other Series must have a name")
1088610886
other = DataFrame({other.name: other})
1088710887

10888+
if on is not None:
10889+
if isinstance(other, Iterable) and not isinstance(
10890+
other, (DataFrame, Series, str, bytes, bytearray)
10891+
):
10892+
if not all(isinstance(obj, (DataFrame, Series)) for obj in other):
10893+
raise TypeError(
10894+
f"Join with 'on={on}' requires a pandas DataFrame or Series, "
10895+
"or an iterable of such objects as 'other'. "
10896+
f"Got {type(other).__name__} with invalid elements."
10897+
)
10898+
elif not isinstance(other, (DataFrame, Series)):
10899+
raise TypeError(
10900+
f"Join with 'on={on}' requires a pandas DataFrame or Series as "
10901+
"'other'. Got "
10902+
f"{type(other).__name__} instead."
10903+
)
10904+
1088810905
if isinstance(other, DataFrame):
1088910906
if how == "cross":
1089010907
return merge(

pandas/tests/frame/methods/test_join.py

+29
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,35 @@ def test_suppress_future_warning_with_sort_kw(sort):
418418
tm.assert_frame_equal(result, expected)
419419

420420

421+
def test_join_with_invalid_non_pandas_objects_raises_typeerror():
422+
# GH#61434
423+
# case - 'other' is an invalid non-pandas object
424+
df1 = DataFrame(
425+
{
426+
"Column2": [10, 20, 30],
427+
"Column3": ["A", "B", "C"],
428+
"Column4": ["Lala", "YesYes", "NoNo"],
429+
}
430+
)
431+
432+
class FakeOther:
433+
def __init__(self):
434+
self.Column2 = [10, 20, 30]
435+
self.Column3 = ["A", "B", "C"]
436+
437+
invalid_other = FakeOther()
438+
439+
with pytest.raises(TypeError, match="requires a pandas DataFrame or Series"):
440+
df1.join(invalid_other, on=["Column2", "Column3"], how="inner")
441+
442+
# 'other' is an iterable with mixed types
443+
df2 = DataFrame({"Column2": [10, 20, 30], "Column3": ["A", "B", "C"]})
444+
mixed_iterable = [df2, 42]
445+
446+
with pytest.raises(TypeError, match="requires a pandas DataFrame or Series"):
447+
df1.join(mixed_iterable, on=["Column2", "Column3"], how="inner")
448+
449+
421450
class TestDataFrameJoin:
422451
def test_join(self, multiindex_dataframe_random_data):
423452
frame = multiindex_dataframe_random_data

0 commit comments

Comments
 (0)