Skip to content

Commit 397aefa

Browse files
committed
a bit better
1 parent 217acd7 commit 397aefa

File tree

1 file changed

+41
-5
lines changed

1 file changed

+41
-5
lines changed

docs/summary-table.md

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ SELECT a::int
2121
```
2222

2323
```sql echo
24-
FROM range(10, 22, 2);
24+
SELECT floor(sqrt(range)), count() FROM range(10, 2278, 2) GROUP BY 1 ORDER BY 2 DESC, 1 DESC LIMIT 10;
2525
```
2626

2727
```sql echo
@@ -77,21 +77,56 @@ function summary({name, type, values}) {
7777
let chart;
7878

7979
// Count values, NaN, nulls, distinct
80-
// TODO use DuckdB?
80+
// TODO optimize with DuckdB?
8181
let max = -Infinity;
8282
let min = Infinity;
83+
let count = 0;
8384
let nulls = 0;
8485
const distinct = new Set();
8586
const capped = 100; // max number of distinct values to count
8687
for (const v of values) {
8788
if (v == null) {nulls++; continue;}
89+
count++;
8890
if (min > v) min = v; // note this works for strings too
8991
if (max < v) max = v;
9092
if (distinct.size <= capped && !distinct.has(v)) distinct.add(v);
9193
}
9294

93-
if (distinct.size <= 10 || type === "Utf8") {
94-
const stackOptions = (type === "Utf8") ? {order: "sum", reverse: true} : {order: "value"};
95+
// categorical
96+
if (type === "Utf8") {
97+
const stackOptions = {order: "sum", reverse: true};
98+
const counts = new Map();
99+
for (const v of values) {
100+
if (v == null) continue;
101+
if (counts.has(v)) counts.set(v, 1 + counts.get(v)); else counts.set(v, 1);
102+
}
103+
const topX = d3.sort(counts, ([, c]) => -c).slice(0, 10);
104+
const visible = new Set(topX.filter(([, c]) => c / count > 0.1).map(([key]) => key));
105+
// TODO:
106+
// - if the “others” group has a single value, use it
107+
// - if a category is already named "Others", use "…" instead
108+
// - separate the nulls
109+
110+
chart = Plot.plot({
111+
width,
112+
height,
113+
style: "overflow: visible;",
114+
x: {axis: null},
115+
y: {axis: null},
116+
marginLeft: 2,
117+
marginRight: 2,
118+
marginTop: 0,
119+
marginBottom: 13,
120+
marks: [
121+
Plot.barX(values, Plot.stackX(stackOptions, Plot.groupZ({x: "count"}, {z: d => visible.has(d) ? d : "Others", insetRight: 1, fill: "var(--theme-foreground-focus)"}))),
122+
Plot.text(values, Plot.stackX(stackOptions, Plot.groupZ({x: "count", text: "first"}, {text: d => visible.has(d) ? d : "Others", z: d => visible.has(d) ? d : "Others", fill: "var(--plot-background)"}))),
123+
]
124+
});
125+
}
126+
127+
// ordinal
128+
else if (distinct.size <= 10) {
129+
const stackOptions = {order: "z"};
95130
chart = Plot.plot({
96131
width,
97132
height,
@@ -108,6 +143,7 @@ function summary({name, type, values}) {
108143
]
109144
});
110145
}
146+
// temporal, quantitative
111147
else {
112148
const thresholds = Math.max(10, Math.min(50, d3.thresholdScott(values, min, max))); // TODO optimize thresholdScott
113149
chart = Plot.plot({
@@ -150,7 +186,7 @@ function summary({name, type, values}) {
150186
151187
<style>
152188
153-
table .type {font-size: smaller; font-weight: normal; height: 1.35em;}
189+
table .type {font-size: smaller; font-weight: normal; color: var(--theme-foreground-muted); height: 1.35em;}
154190
table .summary {font-size: smaller; font-weight: normal; height: 33px;}
155191
footer {font-family: var(--sans-serif); font-size: small; color: var(--theme-foreground-faint)}
156192

0 commit comments

Comments
 (0)