-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: make Styler
compatible with non-unique indexes
#41269
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 16 commits
1cc569f
6982554
a3694db
5c6669c
732c7d5
4c99130
57e8bef
a7a2966
19fb7f9
4ce559e
7f28111
5043c01
9fc6cd3
09764ba
9451aae
4faeb29
aed0536
3a8f11e
51233be
8454c5e
c3b7af8
20cd19f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -82,8 +82,6 @@ def __init__( | |
data = data.to_frame() | ||
if not isinstance(data, DataFrame): | ||
raise TypeError("``data`` must be a Series or DataFrame") | ||
if not data.index.is_unique or not data.columns.is_unique: | ||
raise ValueError("style is not supported for non-unique indices.") | ||
self.data: DataFrame = data | ||
self.index: Index = data.index | ||
self.columns: Index = data.columns | ||
|
@@ -495,9 +493,12 @@ def format( | |
escape=escape, | ||
) | ||
|
||
for row, value in data[[col]].itertuples(): | ||
i, j = self.index.get_loc(row), self.columns.get_loc(col) | ||
self._display_funcs[(i, j)] = format_func | ||
for row in data[[col]].itertuples(): | ||
i_ = self.index.get_indexer_for([row[0]]) # handle duplicate keys in | ||
j_ = self.columns.get_indexer_for([col]) # non-unique indexes | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can do this outside of the loop right? (as col doesn't change), for j_ does this change perf at all? (I don't think so, but checking). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch.. the multiple loops really killed it for the unique case (which is benchmarked)..
So I had to separate out the non-unique and unique cases with a conditional, then performance was the same...
|
||
for i in i_: | ||
for j in j_: | ||
self._display_funcs[(i, j)] = format_func | ||
|
||
return self | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
import pytest | ||
|
||
from pandas import ( | ||
DataFrame, | ||
IndexSlice, | ||
) | ||
|
||
pytest.importorskip("jinja2") | ||
|
||
from pandas.io.formats.style import Styler | ||
|
||
|
||
@pytest.fixture | ||
def df(): | ||
return DataFrame( | ||
[[1, 2, 3], [4, 5, 6], [7, 8, 9]], | ||
index=["i", "j", "j"], | ||
columns=["c", "d", "d"], | ||
dtype=float, | ||
) | ||
|
||
|
||
@pytest.fixture | ||
def styler(df): | ||
return Styler(df, uuid_len=0) | ||
|
||
|
||
def test_format_non_unique(df): | ||
# GH 41269 | ||
|
||
# test dict | ||
html = df.style.format({"d": "{:.1f}"}).render() | ||
for val in ["1.000000<", "4.000000<", "7.000000<"]: | ||
assert val in html | ||
for val in ["2.0<", "3.0<", "5.0<", "6.0<", "8.0<", "9.0<"]: | ||
assert val in html | ||
|
||
# test subset | ||
html = df.style.format(precision=1, subset=IndexSlice["j", "d"]).render() | ||
for val in ["1.000000<", "4.000000<", "7.000000<", "2.000000<", "3.000000<"]: | ||
assert val in html | ||
for val in ["5.0<", "6.0<", "8.0<", "9.0<"]: | ||
assert val in html | ||
|
||
|
||
@pytest.mark.parametrize("func", ["apply", "applymap"]) | ||
def test_apply_applymap_non_unique_raises(df, func): | ||
# GH 41269 | ||
if func == "apply": | ||
op = lambda s: ["color: red;"] * len(s) | ||
else: | ||
op = lambda v: "color: red;" | ||
|
||
with pytest.raises(KeyError, match="`Styler.apply` and `.applymap` are not"): | ||
getattr(df.style, func)(op)._compute() | ||
|
||
|
||
def test_table_styles_dict_non_unique_index(styler): | ||
styles = styler.set_table_styles( | ||
{"j": [{"selector": "td", "props": "a: v;"}]}, axis=1 | ||
).table_styles | ||
assert styles == [ | ||
{"selector": "td.row1", "props": [("a", "v")]}, | ||
{"selector": "td.row2", "props": [("a", "v")]}, | ||
] | ||
|
||
|
||
def test_table_styles_dict_non_unique_columns(styler): | ||
styles = styler.set_table_styles( | ||
{"d": [{"selector": "td", "props": "a: v;"}]}, axis=0 | ||
).table_styles | ||
assert styles == [ | ||
{"selector": "td.col1", "props": [("a", "v")]}, | ||
{"selector": "td.col2", "props": [("a", "v")]}, | ||
] | ||
|
||
|
||
def test_tooltips_non_unique_raises(styler): | ||
# ttips has unique keys | ||
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"]) | ||
styler.set_tooltips(ttips=ttips) # OK | ||
|
||
# ttips has non-unique columns | ||
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"]) | ||
with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"): | ||
styler.set_tooltips(ttips=ttips) | ||
|
||
# ttips has non-unique index | ||
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"]) | ||
with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"): | ||
styler.set_tooltips(ttips=ttips) | ||
|
||
|
||
def test_set_td_classes_non_unique_raises(styler): | ||
# classes has unique keys | ||
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"]) | ||
styler.set_td_classes(classes=classes) # OK | ||
|
||
# classes has non-unique columns | ||
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"]) | ||
with pytest.raises(KeyError, match="Classes render only if `classes` has unique"): | ||
styler.set_td_classes(classes=classes) | ||
|
||
# classes has non-unique index | ||
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"]) | ||
with pytest.raises(KeyError, match="Classes render only if `classes` has unique"): | ||
styler.set_td_classes(classes=classes) | ||
|
||
|
||
def test_hide_columns_non_unique(styler): | ||
ctx = styler.hide_columns(["d"])._translate() | ||
|
||
assert ctx["head"][0][1]["display_value"] == "c" | ||
assert ctx["head"][0][1]["is_visible"] is True | ||
|
||
assert ctx["head"][0][2]["display_value"] == "d" | ||
assert ctx["head"][0][2]["is_visible"] is False | ||
|
||
assert ctx["head"][0][3]["display_value"] == "d" | ||
assert ctx["head"][0][3]["is_visible"] is False | ||
|
||
assert ctx["body"][0][1]["is_visible"] is True | ||
assert ctx["body"][0][2]["is_visible"] is False | ||
assert ctx["body"][0][3]["is_visible"] is False |
Uh oh!
There was an error while loading. Please reload this page.