-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
PERF: fix assert_frame_equal can be very slow #38202
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5970d41
0be8f99
fd5dc65
e8fe687
fbfbcdd
f778a7d
e0cf672
59a178d
82cb7a3
932ec28
bf18386
59bc277
3401d8d
62058bf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1294,6 +1294,8 @@ def assert_series_equal( | |
rtol=1.0e-5, | ||
atol=1.0e-8, | ||
obj="Series", | ||
*, | ||
check_index=True, | ||
): | ||
""" | ||
Check that left and right Series are equal. | ||
|
@@ -1353,6 +1355,10 @@ def assert_series_equal( | |
obj : str, default 'Series' | ||
Specify object name being compared, internally used to show appropriate | ||
assertion message. | ||
check_index : bool, default True | ||
Whether to check index equivalence. If False, then compare only values. | ||
|
||
.. versionadded:: 1.3.0 | ||
|
||
Examples | ||
-------- | ||
|
@@ -1388,18 +1394,20 @@ def assert_series_equal( | |
if check_flags: | ||
assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}" | ||
|
||
# index comparison | ||
assert_index_equal( | ||
left.index, | ||
right.index, | ||
exact=check_index_type, | ||
check_names=check_names, | ||
check_exact=check_exact, | ||
check_categorical=check_categorical, | ||
rtol=rtol, | ||
atol=atol, | ||
obj=f"{obj}.index", | ||
) | ||
if check_index: | ||
# GH #38183 | ||
assert_index_equal( | ||
left.index, | ||
right.index, | ||
exact=check_index_type, | ||
check_names=check_names, | ||
check_exact=check_exact, | ||
check_categorical=check_categorical, | ||
rtol=rtol, | ||
atol=atol, | ||
obj=f"{obj}.index", | ||
) | ||
|
||
if check_freq and isinstance(left.index, (pd.DatetimeIndex, pd.TimedeltaIndex)): | ||
lidx = left.index | ||
ridx = right.index | ||
|
@@ -1704,6 +1712,10 @@ def assert_frame_equal( | |
assert col in right | ||
lcol = left.iloc[:, i] | ||
rcol = right.iloc[:, i] | ||
# GH #38183 | ||
# use check_index=False, because we do not want to run | ||
# assert_index_equal for each column, | ||
# as we already checked it for the whole dataframe before. | ||
assert_series_equal( | ||
lcol, | ||
rcol, | ||
|
@@ -1717,6 +1729,7 @@ def assert_frame_equal( | |
obj=f'{obj}.iloc[:, {i}] (column name="{col}")', | ||
rtol=rtol, | ||
atol=atol, | ||
check_index=False, | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we have a lot of kwargs already. would it be viable to call assert_(ea|numpy)_array_equal on lcol._values and rcol._values? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jbrockmendel, I am afraid I can only suggest the following approach for now. Split Then we can use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. works for me There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm i actually liked your prior implementation ( |
||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
marginally faster to do
right._ixs(i, axis=1)