Skip to content

Commit aeb1295

Browse files
Merge pull request #313 from eutialia/feat/variable_layout
feat(eda.plot): Redesigned layout for plot(df, x)
2 parents d878b85 + 04c7fd5 commit aeb1295

File tree

16 files changed

+563
-396
lines changed

16 files changed

+563
-396
lines changed

assets/plot(df).gif

938 KB
Loading

assets/plot(df,x)_cat.gif

640 KB
Loading

assets/plot(df,x)_num.gif

1.14 MB
Loading

dataprep/eda/container.py

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44

55
import sys
66
import webbrowser
7+
import random
78
from tempfile import NamedTemporaryFile
8-
from typing import List, Dict, Union, Tuple
9+
from typing import Any, Dict
910
from bokeh.io import output_notebook
1011
from bokeh.embed import components
11-
from bokeh.models import LayoutDOM
1212
from bokeh.resources import INLINE
1313
from jinja2 import Environment, PackageLoader
1414
from .utils import is_notebook
@@ -25,28 +25,39 @@ class Container:
2525
This class creates a customized Container object for the plot(df) function.
2626
"""
2727

28-
def __init__(
29-
self,
30-
to_render: Dict[
31-
str,
32-
Union[
33-
List[str],
34-
List[LayoutDOM],
35-
Tuple[Dict[str, str], Dict[str, str]],
36-
Dict[int, List[str]],
37-
],
38-
],
39-
) -> None:
40-
self.context = {
41-
"resources": INLINE.render(),
42-
"components": components(to_render["layout"]),
43-
"tabledata": to_render["tabledata"],
44-
"overview_insights": to_render["overview_insights"],
45-
"column_insights": to_render["column_insights"],
46-
"meta": to_render["meta"],
47-
"title": "DataPrep.EDA Report",
48-
}
49-
self.template_base = ENV_LOADER.get_template("base.html")
28+
def __init__(self, to_render: Dict[str, Any], visual_type: str,) -> None:
29+
if visual_type == "distribution_grid":
30+
self.context = {
31+
"resources": INLINE.render(),
32+
"components": components(to_render["layout"]),
33+
"tabledata": to_render["tabledata"],
34+
"overview_insights": to_render["overview_insights"],
35+
"column_insights": to_render["column_insights"],
36+
"meta": to_render["meta"],
37+
"title": "DataPrep.EDA Report",
38+
"rnd": random.randint(
39+
0, 99
40+
), # for multiple cells running in the same notebook
41+
}
42+
self.template_base = ENV_LOADER.get_template("grid_base.html")
43+
44+
elif "_column" in visual_type:
45+
# todo: param management
46+
to_render["meta"].insert(0, "Stats")
47+
self.context = {
48+
"resources": INLINE.render(),
49+
"tabledata": to_render["tabledata"],
50+
"insights": to_render["insights"],
51+
"components": components(to_render["layout"]),
52+
"meta": to_render["meta"],
53+
"title": "DataPrep.EDA Report",
54+
"rnd": random.randint(
55+
100, 999
56+
), # for multiple cells running in the same notebook
57+
}
58+
self.template_base = ENV_LOADER.get_template("univariate_base.html")
59+
else:
60+
raise TypeError(f"Unsupported Visual Type: {visual_type}.")
5061

5162
def save(self, filename: str) -> None:
5263
"""

dataprep/eda/create_report/formatter.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ def format_report(
6363

6464

6565
def format_basic(df: dd.DataFrame) -> Dict[str, Any]:
66+
# pylint: disable=too-many-statements
6667
"""
6768
Format basic version.
6869
@@ -104,20 +105,23 @@ def format_basic(df: dd.DataFrame) -> Dict[str, Any]:
104105
itmdt = Intermediate(
105106
col=col, data=data[col], visual_type="numerical_column"
106107
)
107-
rndrd = render(itmdt, plot_height_lrg=250, plot_width_lrg=280)
108+
rndrd = render(itmdt, plot_height_lrg=250, plot_width_lrg=280)["layout"]
108109
stats = format_num_stats(data[col])
109110
elif is_dtype(detect_dtype(df[col]), Nominal()):
110111
itmdt = Intermediate(
111112
col=col, data=data[col], visual_type="categorical_column"
112113
)
113-
rndrd = render(itmdt, plot_height_lrg=250, plot_width_lrg=280)
114+
rndrd = render(itmdt, plot_height_lrg=250, plot_width_lrg=280)["layout"]
114115
stats = format_cat_stats(
115116
data[col]["stats"], data[col]["len_stats"], data[col]["letter_stats"]
116117
)
117118
figs: List[Figure] = []
118-
for tab in rndrd.tabs[1:]:
119-
fig = tab.child.children[0]
120-
fig.title = Title(text=tab.title, align="center")
119+
for tab in rndrd:
120+
try:
121+
fig = tab.children[0]
122+
except AttributeError:
123+
fig = tab
124+
# fig.title = Title(text=tab.title, align="center")
121125
figs.append(fig)
122126
res["variables"][col] = {
123127
"tabledata": stats,

dataprep/eda/create_report/templates/styles.html

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@
160160
margin-left: auto;
161161
margin-right: auto;
162162
margin-bottom: 70px;
163-
max-width: 1200px;
163+
max-width: 1320px;
164164
}
165165

166166
.section-variable p {
@@ -377,7 +377,7 @@
377377
margin: unset !important;
378378
}
379379

380-
@media screen and (max-width: 1024px) {
380+
@media screen and (max-width: 1320px) {
381381
h1.tb-title {
382382
max-width: 850px;
383383
}
@@ -387,19 +387,19 @@
387387
}
388388

389389
.section {
390-
max-width: 850px;
390+
max-width: 975px;
391391
}
392392

393393
.var-container>.var-title {
394394
flex: 2 1 10%;
395395
}
396396

397397
.var-container>.tb-container {
398-
flex: 2 1 350px;
398+
flex: 2 1 400px;
399399
}
400400

401401
.var-toggle {
402-
width: 70px;
402+
width: 100px;
403403
}
404404

405405
.vp-plot-categorical {

dataprep/eda/create_report/templates/variables.html

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ <h2 class="tb-title">{{ key }}</h2>
2020
<div class="tb-container">
2121
<div class="tb-{{ value.col_type }}">
2222
<table class="rp-table">
23-
{% for h, d in value.tabledata[0].items() %}
23+
{% for h, d in value.tabledata['Overview'].items() %}
2424
<tr>
2525
<th>{{ h }}</th>
2626
<td>{{ d }}</td>
@@ -49,7 +49,7 @@ <h2 class="tb-title">{{ key }}</h2>
4949
<div>
5050
<h4 class="tb-title">Quantile Statistics</h3>
5151
<table class="rp-table">
52-
{% for h, d in value.tabledata[1].items() %}
52+
{% for h, d in value.tabledata['Quantile Statistics'].items() %}
5353
<tr>
5454
<th>{{ h }}</th>
5555
<td>{{ d }}</td>
@@ -60,7 +60,7 @@ <h4 class="tb-title">Quantile Statistics</h3>
6060
<div>
6161
<h4 class="tb-title">Descriptive Statistics</h3>
6262
<table class="rp-table">
63-
{% for h, d in value.tabledata[2].items() %}
63+
{% for h, d in value.tabledata['Descriptive Statistics'].items() %}
6464
<tr>
6565
<th>{{ h }}</th>
6666
<td>{{ d }}</td>
@@ -75,7 +75,7 @@ <h4 class="tb-title">Descriptive Statistics</h3>
7575
<div>
7676
<h4 class="tb-title">Length</h3>
7777
<table class="rp-table">
78-
{% for h, d in value.tabledata[1].items() %}
78+
{% for h, d in value.tabledata['Length'].items() %}
7979
<tr>
8080
<th>{{ h }}</th>
8181
<td>{{ d }}</td>
@@ -86,7 +86,7 @@ <h4 class="tb-title">Length</h3>
8686
<div>
8787
<h4 class="tb-title">Sample</h3>
8888
<table class="rp-table">
89-
{% for h, d in value.tabledata[2].items() %}
89+
{% for h, d in value.tabledata['Sample'].items() %}
9090
<tr>
9191
<th>{{ h }}</th>
9292
<td>{{ d }}</td>
@@ -97,7 +97,7 @@ <h4 class="tb-title">Sample</h3>
9797
<div>
9898
<h4 class="tb-title">Letter</h3>
9999
<table class="rp-table">
100-
{% for h, d in value.tabledata[3].items() %}
100+
{% for h, d in value.tabledata['Letter'].items() %}
101101
<tr>
102102
<th>{{ h }}</th>
103103
<td>{{ d }}</td>

dataprep/eda/distribution/__init__.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,10 @@ def plot(
169169
dtype=dtype,
170170
)
171171
figure = render(intermediate, yscale=yscale, tile_size=tile_size)
172-
if intermediate.visual_type == "distribution_grid":
173-
return Container(figure)
172+
if (
173+
intermediate.visual_type == "distribution_grid"
174+
or "_column" in intermediate.visual_type
175+
):
176+
return Container(figure, intermediate.visual_type)
174177
else:
175178
return Report(figure)

dataprep/eda/distribution/compute/overview.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ def compute_overview(
121121
stats=ov_stats,
122122
column_insights=col_insights,
123123
overview_insights=_insight_pagination(ov_insights),
124+
ov_insights=ov_insights,
124125
visual_type="distribution_grid",
125126
)
126127

@@ -446,9 +447,9 @@ def _insight_pagination(ins: List[Dict[str, str]]) -> Dict[int, List[Dict[str, s
446447
# sort the insights based on the list ins_order
447448
ins.sort(key=lambda x: ins_order.index(list(x.keys())[0]))
448449
# paginate the sorted insights
449-
page_count = int(np.ceil(len(ins) / 11))
450+
page_count = int(np.ceil(len(ins) / 10))
450451
paginated_ins: Dict[int, List[Dict[str, str]]] = {}
451452
for i in range(1, page_count + 1):
452-
paginated_ins[i] = ins[(i - 1) * 11 : i * 11]
453+
paginated_ins[i] = ins[(i - 1) * 10 : i * 10]
453454

454455
return paginated_ins

0 commit comments

Comments
 (0)