Skip to content

Commit 9523a2c

Browse files
mkcorShammamah Hossain
and
Shammamah Hossain
authored
Support data input as DataFrame in Clustergram. (#478)
* Write test for dataframe input of Clustergram * Support data input as DataFrame in Clustergram * Add integration test for Clustergram reading in dataframes * Lint and standardize imports * Log enhancement for upcoming release * Update version number in package. * Update autogenerated files. * Update CHANGELOG with release date. Co-authored-by: Shammamah Hossain <[email protected]>
1 parent 431f745 commit 9523a2c

File tree

7 files changed

+79
-24
lines changed

7 files changed

+79
-24
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Changelog
22

3+
## [0.4.7] - 2020-02-21
4+
### Added
5+
* [#478](https://github.com/plotly/dash-bio/pull/478) Added support of
6+
DataFrame as Clustergram input.
7+
38
## [0.4.6] - 2020-01-07
49
### Fixed
510
* [#458](https://github.com/plotly/dash-bio/pull/458) Fixed reordering bug of

dash_bio/bundle.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dash_bio/component_factory/_clustergram.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from random import shuffle
33

44
import numpy as np
5+
import pandas as pd
56
import scipy
67
import scipy.cluster.hierarchy as sch
78
import scipy.spatial as scs
@@ -13,7 +14,7 @@
1314

1415
# pylint: disable=assignment-from-no-return, no-self-use
1516
def Clustergram(
16-
data=None,
17+
data,
1718
generate_curves_dict=False,
1819
return_computed_traces=False,
1920
computed_traces=None,
@@ -49,7 +50,8 @@ def Clustergram(
4950
5051
Keyword arguments:
5152
52-
- data (ndarray; required): Matrix of observations as array of arrays
53+
- data (2D array-like; required): Matrix or table of observations (dropping
54+
columns of non-numeric dtype).
5355
- generate_curves_dict (bool; default False): Whether or not to return a
5456
dictionary containing information about the cluster number
5557
associated with each curve number in the graph. (May be useful
@@ -200,7 +202,7 @@ class _Clustergram:
200202

201203
def __init__(
202204
self,
203-
data=None,
205+
data,
204206
row_labels=None,
205207
column_labels=None,
206208
hidden_labels=None,
@@ -234,6 +236,9 @@ def __init__(
234236
See docstring of the `Clustergram` function, where the same keyword arguments (and a couple
235237
of other ones) are documented.
236238
"""
239+
if isinstance(data, pd.DataFrame):
240+
data = data.select_dtypes('number')
241+
data = data.values
237242
if hidden_labels is None:
238243
hidden_labels = []
239244
if color_threshold is None:

dash_bio/package-info.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"name": "dash_bio", "version": "0.4.6", "author": "The Plotly Team <[email protected]>"}
1+
{"name": "dash_bio", "version": "0.4.7", "author": "The Plotly Team <[email protected]>"}

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "dash-bio",
3-
"version": "0.4.6",
3+
"version": "0.4.7",
44
"description": "Dash components for bioinformatics",
55
"repository": {
66
"type": "git",

tests/integration/test_clustergram.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
1-
import pandas
21
import json
2+
import pandas as pd
33

44
import dash
5-
import dash_bio
65
import dash_html_components as html
6+
import dash_bio
77

88
from common_features import nested_component_layout, \
99
nested_component_app_callback
1010

1111
_data = None
1212

13-
_mtcars_data = pandas.read_csv(
13+
_mtcars_data = pd.read_csv(
1414
'tests/dashbio_demos/dash-clustergram/data/mtcars.tsv',
1515
delimiter='\t',
1616
skiprows=4
@@ -185,3 +185,30 @@ def test_dbcl005_row_annotations(dash_duo):
185185
# the annotation is the correct color
186186
dash_duo.wait_for_style_to_equal(
187187
'g.subplot.x6y6 g.plot g.lines > path', 'stroke', 'rgb(248, 62, 199)')
188+
189+
190+
def test_dbcl006_df_input_row_cluster(dash_duo):
191+
192+
app = dash.Dash(__name__)
193+
194+
# run the same test as dbcl002 (row clustering) where table of
195+
# observations (data argument) is left as a DataFrame
196+
assert isinstance(_mtcars_data, pd.DataFrame)
197+
app.layout = html.Div(nested_component_layout(
198+
dash_bio.Clustergram(
199+
data=_mtcars_data
200+
)
201+
))
202+
203+
nested_component_app_callback(
204+
app,
205+
dash_duo,
206+
component=dash_bio.Clustergram,
207+
component_data=_data,
208+
test_prop_name='cluster',
209+
test_prop_value='row',
210+
prop_value_type='string'
211+
)
212+
213+
assert len(dash_duo.find_elements('g.subplot.x2y2')) == 0
214+
assert len(dash_duo.find_elements('g.subplot.x4y4')) == 1

tests/unit/test_clustergram.py

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,51 @@
11
import numpy as np
2+
import pandas as pd
23

34
from dash_bio import Clustergram
45

6+
DATA = np.array(
7+
[[1, 1, 1, 1],
8+
[3, 3, 3, 3],
9+
[1, 1, 1, 1],
10+
[3, 3, 3, 3],
11+
[1, 1, 1, 1],
12+
[3, 3, 3, 3]]
13+
)
14+
CLUSTERED_DATA = np.array(
15+
[[1, 1, 1, 1],
16+
[1, 1, 1, 1],
17+
[1, 1, 1, 1],
18+
[3, 3, 3, 3],
19+
[3, 3, 3, 3],
20+
[3, 3, 3, 3]]
21+
)
22+
523

624
def test_cluster_rows():
725
"""Test that rows of 1's and 3's are properly clustered."""
8-
data = np.array(
9-
[[1, 1, 1, 1],
10-
[3, 3, 3, 3],
11-
[1, 1, 1, 1],
12-
[3, 3, 3, 3],
13-
[1, 1, 1, 1],
14-
[3, 3, 3, 3]]
15-
)
1626

27+
data = DATA
1728
_, _, curves_dict = Clustergram(
1829
data,
1930
generate_curves_dict=True,
2031
return_computed_traces=True,
2132
center_values=False
2233
)
34+
clustered_data = CLUSTERED_DATA
35+
36+
assert np.array_equal(curves_dict['heatmap']['z'], clustered_data)
37+
38+
39+
def test_read_dataframe():
40+
"""Test that input data can be in a dataframe."""
2341

24-
clustered_data = np.array(
25-
[[1, 1, 1, 1],
26-
[1, 1, 1, 1],
27-
[1, 1, 1, 1],
28-
[3, 3, 3, 3],
29-
[3, 3, 3, 3],
30-
[3, 3, 3, 3]]
42+
data = pd.DataFrame(DATA)
43+
_, _, curves_dict = Clustergram(
44+
data,
45+
generate_curves_dict=True,
46+
return_computed_traces=True,
47+
center_values=False
3148
)
49+
clustered_data = CLUSTERED_DATA
3250

3351
assert np.array_equal(curves_dict['heatmap']['z'], clustered_data)

0 commit comments

Comments
 (0)