Skip to content

CLN/TST: normalize test_frame_apply #40113

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 78 additions & 74 deletions pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,20 @@ def int_frame_const_col():
def test_apply(float_frame):
with np.errstate(all="ignore"):
# ufunc
applied = float_frame.apply(np.sqrt)
tm.assert_series_equal(np.sqrt(float_frame["A"]), applied["A"])
result = np.sqrt(float_frame["A"])
expected = float_frame.apply(np.sqrt)["A"]
tm.assert_series_equal(result, expected)

# aggregator
applied = float_frame.apply(np.mean)
assert applied["A"] == np.mean(float_frame["A"])
result = float_frame.apply(np.mean)["A"]
expected = np.mean(float_frame["A"])
assert result == expected

d = float_frame.index[0]
applied = float_frame.apply(np.mean, axis=1)
assert applied[d] == np.mean(float_frame.xs(d))
assert applied.index is float_frame.index # want this
result = float_frame.apply(np.mean, axis=1)
expected = np.mean(float_frame.xs(d))
assert result[d] == expected
assert result.index is float_frame.index

# invalid axis
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"])
Expand All @@ -58,42 +61,42 @@ def test_apply(float_frame):

# GH 9573
df = DataFrame({"c0": ["A", "A", "B", "B"], "c1": ["C", "C", "D", "D"]})
df = df.apply(lambda ts: ts.astype("category"))
result = df.apply(lambda ts: ts.astype("category"))

assert df.shape == (4, 2)
assert isinstance(df["c0"].dtype, CategoricalDtype)
assert isinstance(df["c1"].dtype, CategoricalDtype)
assert result.shape == (4, 2)
assert isinstance(result["c0"].dtype, CategoricalDtype)
assert isinstance(result["c1"].dtype, CategoricalDtype)


def test_apply_axis1_with_ea():
# GH#36785
df = DataFrame({"A": [Timestamp("2013-01-01", tz="UTC")]})
result = df.apply(lambda x: x, axis=1)
tm.assert_frame_equal(result, df)
# GH 36785
expected = DataFrame({"A": [Timestamp("2013-01-01", tz="UTC")]})
result = expected.apply(lambda x: x, axis=1)
tm.assert_frame_equal(result, expected)


def test_apply_mixed_datetimelike():
# mixed datetimelike
# GH 7778
df = DataFrame(
expected = DataFrame(
{
"A": date_range("20130101", periods=3),
"B": pd.to_timedelta(np.arange(3), unit="s"),
}
)
result = df.apply(lambda x: x, axis=1)
tm.assert_frame_equal(result, df)
result = expected.apply(lambda x: x, axis=1)
tm.assert_frame_equal(result, expected)


def test_apply_empty(float_frame):
# empty
empty_frame = DataFrame()

applied = empty_frame.apply(np.sqrt)
assert applied.empty
result = empty_frame.apply(np.sqrt)
assert result.empty

applied = empty_frame.apply(np.mean)
assert applied.empty
result = empty_frame.apply(np.mean)
assert result.empty

no_rows = float_frame[:0]
result = no_rows.apply(lambda x: x.mean())
Expand All @@ -108,7 +111,7 @@ def test_apply_empty(float_frame):
# GH 2476
expected = DataFrame(index=["a"])
result = expected.apply(lambda x: x["a"], axis=1)
tm.assert_frame_equal(expected, result)
tm.assert_frame_equal(result, expected)


def test_apply_with_reduce_empty():
Expand Down Expand Up @@ -285,14 +288,13 @@ def _assert_raw(x):
float_frame.apply(_assert_raw, raw=True)
float_frame.apply(_assert_raw, axis=1, raw=True)

result0 = float_frame.apply(np.mean, raw=True)
result1 = float_frame.apply(np.mean, axis=1, raw=True)

expected0 = float_frame.apply(lambda x: x.values.mean())
expected1 = float_frame.apply(lambda x: x.values.mean(), axis=1)
result = float_frame.apply(np.mean, raw=True)
expected = float_frame.apply(lambda x: x.values.mean())
tm.assert_series_equal(result, expected)

tm.assert_series_equal(result0, expected0)
tm.assert_series_equal(result1, expected1)
result = float_frame.apply(np.mean, axis=1, raw=True)
expected = float_frame.apply(lambda x: x.values.mean(), axis=1)
tm.assert_series_equal(result, expected)

# no reduction
result = float_frame.apply(lambda x: x * 2, raw=True)
Expand All @@ -306,8 +308,9 @@ def _assert_raw(x):

def test_apply_axis1(float_frame):
d = float_frame.index[0]
tapplied = float_frame.apply(np.mean, axis=1)
assert tapplied[d] == np.mean(float_frame.xs(d))
result = float_frame.apply(np.mean, axis=1)[d]
expected = np.mean(float_frame.xs(d))
assert result == expected


def test_apply_mixed_dtype_corner():
Expand Down Expand Up @@ -401,27 +404,25 @@ def test_apply_reduce_to_dict():
# GH 25196 37544
data = DataFrame([[1, 2], [3, 4]], columns=["c0", "c1"], index=["i0", "i1"])

result0 = data.apply(dict, axis=0)
expected0 = Series([{"i0": 1, "i1": 3}, {"i0": 2, "i1": 4}], index=data.columns)
tm.assert_series_equal(result0, expected0)
result = data.apply(dict, axis=0)
expected = Series([{"i0": 1, "i1": 3}, {"i0": 2, "i1": 4}], index=data.columns)
tm.assert_series_equal(result, expected)

result1 = data.apply(dict, axis=1)
expected1 = Series([{"c0": 1, "c1": 2}, {"c0": 3, "c1": 4}], index=data.index)
tm.assert_series_equal(result1, expected1)
result = data.apply(dict, axis=1)
expected = Series([{"c0": 1, "c1": 2}, {"c0": 3, "c1": 4}], index=data.index)
tm.assert_series_equal(result, expected)


def test_apply_differently_indexed():
df = DataFrame(np.random.randn(20, 10))

result0 = df.apply(Series.describe, axis=0)
expected0 = DataFrame({i: v.describe() for i, v in df.items()}, columns=df.columns)
tm.assert_frame_equal(result0, expected0)
result = df.apply(Series.describe, axis=0)
expected = DataFrame({i: v.describe() for i, v in df.items()}, columns=df.columns)
tm.assert_frame_equal(result, expected)

result1 = df.apply(Series.describe, axis=1)
expected1 = DataFrame(
{i: v.describe() for i, v in df.T.items()}, columns=df.index
).T
tm.assert_frame_equal(result1, expected1)
result = df.apply(Series.describe, axis=1)
expected = DataFrame({i: v.describe() for i, v in df.T.items()}, columns=df.index).T
tm.assert_frame_equal(result, expected)


def test_apply_modify_traceback():
Expand Down Expand Up @@ -525,7 +526,7 @@ def f(r):


def test_apply_convert_objects():
data = DataFrame(
expected = DataFrame(
{
"A": [
"foo",
Expand Down Expand Up @@ -572,8 +573,8 @@ def test_apply_convert_objects():
}
)

result = data.apply(lambda x: x, axis=1)
tm.assert_frame_equal(result._convert(datetime=True), data)
result = expected.apply(lambda x: x, axis=1)._convert(datetime=True)
tm.assert_frame_equal(result, expected)


def test_apply_attach_name(float_frame):
Expand Down Expand Up @@ -635,17 +636,17 @@ def test_applymap(float_frame):
float_frame.applymap(type)

# GH 465: function returning tuples
result = float_frame.applymap(lambda x: (x, x))
assert isinstance(result["A"][0], tuple)
result = float_frame.applymap(lambda x: (x, x))["A"][0]
assert isinstance(result, tuple)

# GH 2909: object conversion to float in constructor?
df = DataFrame(data=[1, "a"])
result = df.applymap(lambda x: x)
assert result.dtypes[0] == object
result = df.applymap(lambda x: x).dtypes[0]
assert result == object

df = DataFrame(data=[1.0, "a"])
result = df.applymap(lambda x: x)
assert result.dtypes[0] == object
result = df.applymap(lambda x: x).dtypes[0]
assert result == object

# GH 2786
df = DataFrame(np.random.random((3, 4)))
Expand All @@ -672,10 +673,10 @@ def test_applymap(float_frame):
DataFrame(index=list("ABC")),
DataFrame({"A": [], "B": [], "C": []}),
]
for frame in empty_frames:
for expected in empty_frames:
for func in [round, lambda x: x]:
result = frame.applymap(func)
tm.assert_frame_equal(result, frame)
result = expected.applymap(func)
tm.assert_frame_equal(result, expected)


def test_applymap_na_ignore(float_frame):
Expand Down Expand Up @@ -743,7 +744,8 @@ def test_frame_apply_dont_convert_datetime64():
df = df.applymap(lambda x: x + BDay())
df = df.applymap(lambda x: x + BDay())

assert df.x1.dtype == "M8[ns]"
result = df.x1.dtype
assert result == "M8[ns]"


def test_apply_non_numpy_dtype():
Expand Down Expand Up @@ -786,16 +788,18 @@ def apply_list(row):


def test_apply_noreduction_tzaware_object():
# https://github.com/pandas-dev/pandas/issues/31505
df = DataFrame({"foo": [Timestamp("2020", tz="UTC")]}, dtype="datetime64[ns, UTC]")
result = df.apply(lambda x: x)
tm.assert_frame_equal(result, df)
result = df.apply(lambda x: x.copy())
tm.assert_frame_equal(result, df)
# GH 31505
expected = DataFrame(
{"foo": [Timestamp("2020", tz="UTC")]}, dtype="datetime64[ns, UTC]"
)
result = expected.apply(lambda x: x)
tm.assert_frame_equal(result, expected)
result = expected.apply(lambda x: x.copy())
tm.assert_frame_equal(result, expected)


def test_apply_function_runs_once():
# https://github.com/pandas-dev/pandas/issues/30815
# GH 30815

df = DataFrame({"a": [1, 2, 3]})
names = [] # Save row names function is applied to
Expand All @@ -815,7 +819,7 @@ def non_reducing_function(row):


def test_apply_raw_function_runs_once():
# https://github.com/pandas-dev/pandas/issues/34506
# GH 34506

df = DataFrame({"a": [1, 2, 3]})
values = [] # Save row values function is applied to
Expand Down Expand Up @@ -885,11 +889,11 @@ def test_infer_row_shape():
# GH 17437
# if row shape is changing, infer it
df = DataFrame(np.random.rand(10, 2))
result = df.apply(np.fft.fft, axis=0)
assert result.shape == (10, 2)
result = df.apply(np.fft.fft, axis=0).shape
assert result == (10, 2)

result = df.apply(np.fft.rfft, axis=0)
assert result.shape == (6, 2)
result = df.apply(np.fft.rfft, axis=0).shape
assert result == (6, 2)


def test_with_dictlike_columns():
Expand Down Expand Up @@ -1604,7 +1608,7 @@ def test_apply_dtype(col):


def test_apply_mutating():
# GH#35462 case where applied func pins a new BlockManager to a row
# GH 35462 case where applied func pins a new BlockManager to a row
df = DataFrame({"a": range(100), "b": range(100, 200)})

def func(row):
Expand All @@ -1623,7 +1627,7 @@ def func(row):


def test_apply_empty_list_reduce():
# GH#35683 get columns correct
# GH 35683 get columns correct
df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]], columns=["a", "b"])

result = df.apply(lambda x: [], result_type="reduce")
Expand All @@ -1643,7 +1647,7 @@ def test_apply_no_suffix_index():


def test_apply_raw_returns_string():
# https://github.com/pandas-dev/pandas/issues/35940
# GH 35940
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I personally don't find this an improvement. I know we are inconsistent about it, but if there is a full link, it's much easier to go to the issue

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is how we do for every other comment about the issue number

if we want to require a full link it's possible but would need a precommit hook

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't do that for "every other comment". We currently use the full link in 400 cases.

Not every detail needs to be controlled with a pre-commit hook

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and how many w/o a link? i don't know but i would say it's way higher

we have to have a standard and then enforce it

we simply cannot track stylistic things any other way or involve personal preference here (once we agree on a standard)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jorisvandenbossche - Thanks for the feedback here, I didn't realize there wasn't a consensus. I actually agree on the preference for a link (but also more strongly prefer consistency). I've reverted these changes.

Assuming we can get a consensus, if things were made consistent and we updated the dev docs, it seems to me that consistency would become the norm and not require enforcement after a short amount of time. If it doesn't turn out to be the case and becomes a hassle, we could then implement a pre-commit hook.

df = DataFrame({"A": ["aa", "bbb"]})
result = df.apply(lambda x: x[0], axis=1, raw=True)
expected = Series(["aa", "bbb"])
Expand Down