From ad3017eae68c2442c590d397930b1d9e6fc7612a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Wed, 20 Mar 2024 17:52:31 +0100 Subject: [PATCH 01/21] prototyping in userland --- docs/summary-table.md | 114 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 docs/summary-table.md diff --git a/docs/summary-table.md b/docs/summary-table.md new file mode 100644 index 000000000..79c4961a0 --- /dev/null +++ b/docs/summary-table.md @@ -0,0 +1,114 @@ +# SQL summary table ([#23](https://github.com/observablehq/framework/issues/23)) + +```sql echo +SELECT * FROM aapl; +``` + +```sql echo +SELECT * FROM gaia; +``` + +```sql echo +SELECT * FROM penguins; +``` + +```js echo +const sql = DuckDBClient.sql({aapl, penguins, gaia: FileAttachment("/lib/gaia-sample.parquet")}); +``` + +```js echo +import * as _Inputs from "npm:@observablehq/inputs" +import * as Arrow from "npm:apache-arrow"; +import * as d3 from "npm:d3"; +import {html} from "npm:htl"; + +width; // refresh when resized + +const Inputs = ({..._Inputs, table}) + +function table(data, options = {}) { + if (!data) return data; + + const container = document.createElement("div"); + container.append(_Inputs.table(data, options)); + + // Duck typing Arrow table + if (!Array.isArray(data?.schema?.fields)) return container; + + // Get the fields as described by Arrow, in the order given (potentially) by the options. + const fields = (options.columns?.map(k => data.schema.find(({name}) => name === k)) ?? data.schema.fields).map(({name, type}) => ({name: String(name), type: String(type)})); + + const th = d3.select(container).select("thead").selectAll("th").data([{}, ...fields]); + th.append("div").classed("type", true).html(({type}) => type); + const summaries = th.append("div").classed("summary", true); + const footer = html``; + container.appendChild(footer); + + requestAnimationFrame(() => summaries + .filter(({type}) => type) + .append(function({name, type}) { + return summary(data.getChild(name), type, this.getBoundingClientRect()); + }) + ); + return container; +} + +function summary(values, type, {width = 80, height = 33}) { + let chart; + if (type.startsWith("Float") || type.startsWith("Date")) { + chart = Plot.plot({ + width, + height, + style: "overflow: visible;", + x: {round: true}, + y: {axis: null}, + marginLeft: 2, + marginRight: 2, + marginTop: 0, + marginBottom: 13, + marks: [ + Plot.rectY(values, Plot.binX(undefined, {fill: "var(--theme-foreground-focus)", inset: -.3})), + Plot.axisX({tickSpacing: 41, tickSize: 3, tickPadding: 2, fontSize: 8}), + ] + }); + + // restore insets where possible + const rects = chart.querySelectorAll("rect"); + if (rects.length < 100) { + for (const rect of rects) { + rect.setAttribute("x", Math.floor(rect.getAttribute("x"))); + rect.setAttribute("width", Math.max(1, Math.floor(rect.getAttribute("width")) - 1)); + } + } + } + else if (type === "Utf8") { + chart = Plot.plot({ + width, + height, + style: "overflow: visible;", + x: {axis: null}, + y: {axis: null}, + marginLeft: 2, + marginRight: 2, + marginTop: 0, + marginBottom: 13, + marks: [ + Plot.barX(values, Plot.groupZ({x: "count"}, {z: Plot.identity, insetRight: 1, fill: "var(--theme-foreground-focus)"})), + Plot.text(values, Plot.stackX(Plot.groupZ({x: "count", text: "first"}, {z: Plot.identity, fill: "var(--plot-background)"}))), + ] + }); + } + return chart ?? html`Unknown type ${type}`; +} +``` + + From 237f9eece8b1914001aadf1cd70ddb7e1cab2e0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 25 Mar 2024 15:38:46 +0100 Subject: [PATCH 02/21] more examples, better presentation --- docs/summary-table.md | 99 +++++++++++++++++++++++++++++++------------ 1 file changed, 71 insertions(+), 28 deletions(-) diff --git a/docs/summary-table.md b/docs/summary-table.md index 79c4961a0..07b5d81f8 100644 --- a/docs/summary-table.md +++ b/docs/summary-table.md @@ -12,6 +12,26 @@ SELECT * FROM gaia; SELECT * FROM penguins; ``` +```sql echo +SELECT a::int + , b::int + , (a * b)::int as p + FROM generate_series(1, 9) as s(a) + , generate_series(1, 9) as t(b); +``` + +```sql echo +FROM range(10, 22, 2); +``` + +```sql echo +SELECT range * pi() as pi FROM range(10); +``` + +```sql echo +SELECT cos(range * pi() / 10) as x, sin(range * pi() / 10) as y FROM range(0, 20, 1); +``` + ```js echo const sql = DuckDBClient.sql({aapl, penguins, gaia: FileAttachment("/lib/gaia-sample.parquet")}); ``` @@ -36,7 +56,7 @@ function table(data, options = {}) { if (!Array.isArray(data?.schema?.fields)) return container; // Get the fields as described by Arrow, in the order given (potentially) by the options. - const fields = (options.columns?.map(k => data.schema.find(({name}) => name === k)) ?? data.schema.fields).map(({name, type}) => ({name: String(name), type: String(type)})); + const fields = (options.columns?.map(k => data.schema.find(({name}) => name === k)) ?? data.schema.fields).map(({name, type}) => ({name: String(name), type: String(type), values: data.getChild(name)})); const th = d3.select(container).select("thead").selectAll("th").data([{}, ...fields]); th.append("div").classed("type", true).html(({type}) => type); @@ -47,61 +67,84 @@ function table(data, options = {}) { `; container.appendChild(footer); - requestAnimationFrame(() => summaries - .filter(({type}) => type) - .append(function({name, type}) { - return summary(data.getChild(name), type, this.getBoundingClientRect()); - }) - ); + requestAnimationFrame(() => summaries.filter(({type}) => type).append(summary)); return container; } -function summary(values, type, {width = 80, height = 33}) { +function summary({name, type, values}) { + const {width: w, height} = this.getBoundingClientRect(); + const width = Math.min(200, (w ?? 80) - 10); let chart; - if (type.startsWith("Float") || type.startsWith("Date")) { + + // Count values, NaN, nulls, distinct + // TODO use DuckdB? + let max = -Infinity; + let min = Infinity; + let nulls = 0; + const distinct = new Set(); + const capped = 100; // max number of distinct values to count + for (const v of values) { + if (v == null) {nulls++; continue;} + if (min > v) min = v; // note this works for strings too + if (max < v) max = v; + if (distinct.size <= capped && !distinct.has(v)) distinct.add(v); + } + + if (distinct.size <= 10 || type === "Utf8") { + const stackOptions = (type === "Utf8") ? {order: "sum", reverse: true} : {order: "value"}; chart = Plot.plot({ width, height, style: "overflow: visible;", - x: {round: true}, + x: {axis: null}, y: {axis: null}, marginLeft: 2, marginRight: 2, marginTop: 0, marginBottom: 13, marks: [ - Plot.rectY(values, Plot.binX(undefined, {fill: "var(--theme-foreground-focus)", inset: -.3})), - Plot.axisX({tickSpacing: 41, tickSize: 3, tickPadding: 2, fontSize: 8}), + Plot.barX(values, Plot.stackX(stackOptions, Plot.groupZ({x: "count"}, {z: Plot.identity, insetRight: 1, fill: "var(--theme-foreground-focus)"}))), + Plot.text(values, Plot.stackX(stackOptions, Plot.groupZ({x: "count", text: "first"}, {z: Plot.identity, fill: "var(--plot-background)"}))), ] }); - - // restore insets where possible - const rects = chart.querySelectorAll("rect"); - if (rects.length < 100) { - for (const rect of rects) { - rect.setAttribute("x", Math.floor(rect.getAttribute("x"))); - rect.setAttribute("width", Math.max(1, Math.floor(rect.getAttribute("width")) - 1)); - } - } } - else if (type === "Utf8") { + else { + const thresholds = Math.max(10, Math.min(50, d3.thresholdScott(values, min, max))); // TODO optimize thresholdScott chart = Plot.plot({ width, height, style: "overflow: visible;", - x: {axis: null}, + x: { + round: true, + nice: true + }, y: {axis: null}, - marginLeft: 2, - marginRight: 2, + marginLeft: 9, + marginRight: 9, marginTop: 0, marginBottom: 13, marks: [ - Plot.barX(values, Plot.groupZ({x: "count"}, {z: Plot.identity, insetRight: 1, fill: "var(--theme-foreground-focus)"})), - Plot.text(values, Plot.stackX(Plot.groupZ({x: "count", text: "first"}, {z: Plot.identity, fill: "var(--plot-background)"}))), + thresholds > 20 ? + Plot.areaY(values, Plot.binX(undefined, { + fill: "var(--theme-foreground-focus)", + thresholds + })) : + Plot.rectY(values, Plot.binX(undefined, { + fill: "var(--theme-foreground-focus)", + thresholds, + inset: 0, + insetRight: 1, + })), + min * max <= 0 ? Plot.ruleX([0]) : [], + Plot.ruleY([0]), + Plot.axisX({tickSpacing: 41, tickSize: 3, tickPadding: 2, fontSize: 8, ...(!type.startsWith("Date") && Math.max(Math.abs(min), Math.abs(max)) >= 1e5 && {tickFormat: "s"})}), ] }); } - return chart ?? html`Unknown type ${type}`; + return chart ? html`
${chart}` : html`Unknown type ${type}`; } ``` From c8d37a719fe9ee5afa5c6d8b549df090ee1620ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 25 Mar 2024 16:26:40 +0100 Subject: [PATCH 03/21] a bit better --- docs/summary-table.md | 46 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/docs/summary-table.md b/docs/summary-table.md index 07b5d81f8..ce1e072c9 100644 --- a/docs/summary-table.md +++ b/docs/summary-table.md @@ -21,7 +21,7 @@ SELECT a::int ``` ```sql echo -FROM range(10, 22, 2); +SELECT floor(sqrt(range)), count() FROM range(10, 2278, 2) GROUP BY 1 ORDER BY 2 DESC, 1 DESC LIMIT 10; ``` ```sql echo @@ -77,21 +77,56 @@ function summary({name, type, values}) { let chart; // Count values, NaN, nulls, distinct - // TODO use DuckdB? + // TODO optimize with DuckdB? let max = -Infinity; let min = Infinity; + let count = 0; let nulls = 0; const distinct = new Set(); const capped = 100; // max number of distinct values to count for (const v of values) { if (v == null) {nulls++; continue;} + count++; if (min > v) min = v; // note this works for strings too if (max < v) max = v; if (distinct.size <= capped && !distinct.has(v)) distinct.add(v); } - if (distinct.size <= 10 || type === "Utf8") { - const stackOptions = (type === "Utf8") ? {order: "sum", reverse: true} : {order: "value"}; + // categorical + if (type === "Utf8") { + const stackOptions = {order: "sum", reverse: true}; + const counts = new Map(); + for (const v of values) { + if (v == null) continue; + if (counts.has(v)) counts.set(v, 1 + counts.get(v)); else counts.set(v, 1); + } + const topX = d3.sort(counts, ([, c]) => -c).slice(0, 10); + const visible = new Set(topX.filter(([, c]) => c / count > 0.1).map(([key]) => key)); + // TODO: + // - if the “others” group has a single value, use it + // - if a category is already named "Others", use "…" instead + // - separate the nulls + + chart = Plot.plot({ + width, + height, + style: "overflow: visible;", + x: {axis: null}, + y: {axis: null}, + marginLeft: 2, + marginRight: 2, + marginTop: 0, + marginBottom: 13, + marks: [ + Plot.barX(values, Plot.stackX(stackOptions, Plot.groupZ({x: "count"}, {z: d => visible.has(d) ? d : "Others", insetRight: 1, fill: "var(--theme-foreground-focus)"}))), + Plot.text(values, Plot.stackX(stackOptions, Plot.groupZ({x: "count", text: "first"}, {text: d => visible.has(d) ? d : "Others", z: d => visible.has(d) ? d : "Others", fill: "var(--plot-background)"}))), + ] + }); + } + + // ordinal + else if (distinct.size <= 10) { + const stackOptions = {order: "z"}; chart = Plot.plot({ width, height, @@ -108,6 +143,7 @@ function summary({name, type, values}) { ] }); } + // temporal, quantitative else { const thresholds = Math.max(10, Math.min(50, d3.thresholdScott(values, min, max))); // TODO optimize thresholdScott chart = Plot.plot({ @@ -150,7 +186,7 @@ function summary({name, type, values}) { From 8dd116c173c0ff9d429d87eb7414caf9b0dcc513 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 1 Apr 2024 17:16:30 +0200 Subject: [PATCH 07/21] filters (just one for now: search) --- docs/summary-table.md | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/docs/summary-table.md b/docs/summary-table.md index cd38dc8a1..95ac4350b 100644 --- a/docs/summary-table.md +++ b/docs/summary-table.md @@ -48,7 +48,7 @@ const Inputs = ({..._Inputs, table}) function table(data, options = {}) { if (!data) return data; - const table = _Inputs.table(data, options); + let table = _Inputs.table(data, options); // Duck typing Arrow table if (!Array.isArray(data?.schema?.fields)) return container; @@ -66,16 +66,44 @@ function table(data, options = {}) { const th = d3.select(container).select("thead").selectAll("th").data([{}, ...fields]); th.append("div").classed("type", true).html(({type}) => type); const summaries = th.append("div").classed("summary", true); + + const textFields = fields.filter(({type}) => type === "Utf8"); const footer = html`
-
+ ${textFields.length ? html`
` : ""}
${data.numRows.toLocaleString("en-US")} rows
`; container.appendChild(footer); + const filters = new Map(); + requestAnimationFrame(() => { for (const s of summaries.filter(({type}) => type)) summary(s); }); return container; + + function refresh() { + const index0 = d3.range(data.length ?? data.numRows); + let index = index0; + for (const [, f] of filters) index = index.filter(f); + table.replaceWith(table = _Inputs.table(index === index0 ? data : take(data, index), options)) + } + function take(data, index) { + return Array.from(index, (i) => data.get(i)); + } + + function search() { + const value = this.value; + filters.delete("search"); + if (value) { + try { + const re = new RegExp(`(^|\b)${value}`, "ui"); + filters.set("search", (i) => textFields.some(({values}) => re.test(values.get(i)))); + } catch(error) { + console.warn(error); + } + } + refresh(); + } } async function summary(div) { From 9246ffcacfe864d67c905f217a335b9e174e8112 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 1 Apr 2024 17:56:56 +0200 Subject: [PATCH 08/21] temporary (dirty) fix for filtering proper fix will need to rewrite the table --- docs/summary-table.md | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/docs/summary-table.md b/docs/summary-table.md index 95ac4350b..40fb0f799 100644 --- a/docs/summary-table.md +++ b/docs/summary-table.md @@ -79,14 +79,26 @@ function table(data, options = {}) { requestAnimationFrame(() => { for (const s of summaries.filter(({type}) => type)) summary(s); }); + + // save table headers for the dirty copy below + const thtype = [...d3.select(table).selectAll("th :nth-child(2)")] + const thsummary = [...d3.select(table).selectAll("th :nth-child(3)")] return container; function refresh() { const index0 = d3.range(data.length ?? data.numRows); let index = index0; for (const [, f] of filters) index = index.filter(f); - table.replaceWith(table = _Inputs.table(index === index0 ? data : take(data, index), options)) + + // TODO: make a fork of Inputs.table that works with index + // In the meantime, here's a very dirty approach + const _data = index === index0 ? data : take(data, index); + table.replaceWith(table = _Inputs.table(_data, options)); + const th = d3.select(table).selectAll("th"); + th.append((d, i) => thtype[i]); + th.append((d, i) => thsummary[i]); } + function take(data, index) { return Array.from(index, (i) => data.get(i)); } @@ -97,7 +109,8 @@ function table(data, options = {}) { if (value) { try { const re = new RegExp(`(^|\b)${value}`, "ui"); - filters.set("search", (i) => textFields.some(({values}) => re.test(values.get(i)))); + let tmp; + filters.set("search", (i) => textFields.some(({values}) => ((tmp = values.get(i)) && re.test(tmp)))); } catch(error) { console.warn(error); } From 691ac7afaecab850a7c3afdb29bf4894abad6ce8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 1 Apr 2024 19:46:54 +0200 Subject: [PATCH 09/21] filter by category --- docs/summary-table.md | 48 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/docs/summary-table.md b/docs/summary-table.md index 40fb0f799..b005b213c 100644 --- a/docs/summary-table.md +++ b/docs/summary-table.md @@ -77,7 +77,7 @@ function table(data, options = {}) { const filters = new Map(); requestAnimationFrame(() => { - for (const s of summaries.filter(({type}) => type)) summary(s); + for (const s of summaries.filter(({type}) => type)) summary(s, filters, refresh); }); // save table headers for the dirty copy below @@ -112,6 +112,7 @@ function table(data, options = {}) { let tmp; filters.set("search", (i) => textFields.some(({values}) => ((tmp = values.get(i)) && re.test(tmp)))); } catch(error) { + // malformed RegExp: surface the error? or ignore and treat as string? console.warn(error); } } @@ -119,7 +120,7 @@ function table(data, options = {}) { } } -async function summary(div) { +async function summary(div, filters, refresh) { const {name, type, values} = d3.select(div).datum(); const {width: w, height} = div.getBoundingClientRect(); const width = Math.min(200, (w ?? 80)); @@ -145,16 +146,24 @@ async function summary(div) { if (type === "Utf8") { const stackOptions = {order: "sum", reverse: true}; const counts = new Map(); + let nulls = 0; for (const v of values) { - if (v == null) continue; + if (v == null) {nulls++; continue;} if (counts.has(v)) counts.set(v, 1 + counts.get(v)); else counts.set(v, 1); } const topX = d3.sort(counts, ([, c]) => -c).slice(0, 10); - const visible = new Set(topX.filter(([, c]) => c / count > 0.1).map(([key]) => key)); + const visible = new Map(topX.filter(([, c]) => c / count > 0.3)); + const others = d3.sum(counts, ([key, c]) => visible.has(key) ? 0 : c); + // TODO: // - if the “others” group has a single value, use it // - if a category is already named "Others", use "…" instead - // - separate the nulls + + const bars = [...visible]; + const Other = {toString() {return "Other"}}; + const Null = {toString() {return "null"}}; + if (others) bars.push([Other, others]); + if (nulls) bars.push([Null, nulls]); chart = Plot.plot({ width, @@ -167,10 +176,34 @@ async function summary(div) { marginTop: 0, marginBottom: 13, marks: [ - Plot.barX(values, Plot.stackX(stackOptions, Plot.groupZ({x: "count"}, {z: d => visible.has(d) ? d : "Others", insetRight: 1, fill: "var(--theme-foreground-focus)"}))), - Plot.text(values, Plot.stackX(stackOptions, Plot.groupZ({x: "count", text: "first"}, {text: d => visible.has(d) ? d : "Others", z: d => visible.has(d) ? d : "Others", fill: "var(--plot-background)"}))), + Plot.barX(bars, {x: "1", insetRight: 1, fill: ([x]) => typeof x === "string" ? "var(--theme-foreground-focus)" : "gray"}), + Plot.text(bars, Plot.stackX({text: "0", x: "1", fill: "var(--plot-background)", pointerEvents: "none"})), ] }); + + let currentMode; + const buttons = d3.select(chart).selectAll("rect").on("click", function(event) { + const mode = bars[this.__data__][0]; + if (filters.has(name) && currentMode === mode) { + filters.delete(name); + currentMode = undefined; + d3.select(this).classed("selected", false); + } + else { + if (mode === Null) { + filters.set(name, (i) => values.get(i) == null); + } else if (mode === Other) { + filters.set(name, (i) => { + const v = values.get(i); + return v != null && !visible.has(v); + }); + } else filters.set(name, (i) => values.get(i) === mode); + currentMode = mode; + d3.select(chart).selectAll("rect").classed("selected", false); + d3.select(this).classed("selected", true); + } + refresh(); + }); } // ordinal @@ -242,5 +275,6 @@ async function summary(div) { .summary-table table .type {font-size: smaller; font-weight: normal; color: var(--theme-foreground-muted); height: 1.35em;} .summary-table table .summary {font-size: smaller; font-weight: normal; height: 33px;} .summary-table footer {font-family: var(--sans-serif); font-size: small; color: var(--theme-foreground-faint)} + .summary-table rect.selected { fill: orange; } From f62a76d14662b844913758f0d574a78311fda61f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 1 Apr 2024 19:51:09 +0200 Subject: [PATCH 10/21] tally --- docs/summary-table.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/summary-table.md b/docs/summary-table.md index b005b213c..1eff978d3 100644 --- a/docs/summary-table.md +++ b/docs/summary-table.md @@ -68,9 +68,10 @@ function table(data, options = {}) { const summaries = th.append("div").classed("summary", true); const textFields = fields.filter(({type}) => type === "Utf8"); + const tally = html`
${data.numRows.toLocaleString("en-US")} rows
`; const footer = html`
${textFields.length ? html`
` : ""} -
${data.numRows.toLocaleString("en-US")} rows
+ ${tally}
`; container.appendChild(footer); @@ -97,6 +98,8 @@ function table(data, options = {}) { const th = d3.select(table).selectAll("th"); th.append((d, i) => thtype[i]); th.append((d, i) => thsummary[i]); + + tally.innerHTML = index === index0 ? `${index.length} rows` : `${index.length} / ${index0.length}`; } function take(data, index) { From 16d69478b5435468dded667198b35f39dba209fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 1 Apr 2024 20:05:03 +0200 Subject: [PATCH 11/21] clean up todos --- docs/summary-table.md | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/docs/summary-table.md b/docs/summary-table.md index 1eff978d3..dc8a78eae 100644 --- a/docs/summary-table.md +++ b/docs/summary-table.md @@ -148,23 +148,22 @@ async function summary(div, filters, refresh) { // categorical if (type === "Utf8") { const stackOptions = {order: "sum", reverse: true}; - const counts = new Map(); + let counts = new Map(); let nulls = 0; for (const v of values) { if (v == null) {nulls++; continue;} if (counts.has(v)) counts.set(v, 1 + counts.get(v)); else counts.set(v, 1); } - const topX = d3.sort(counts, ([, c]) => -c).slice(0, 10); - const visible = new Map(topX.filter(([, c]) => c / count > 0.3)); + counts = d3.sort(counts, ([, c]) => -c); + const topX = counts.slice(0, 10); + let visible = new Map(topX.filter(([, c]) => c / count > 0.07)); + if (counts.length === visible.size + 1) visible = new Map(counts); // if the “others” group has a single value, use it const others = d3.sum(counts, ([key, c]) => visible.has(key) ? 0 : c); - // TODO: - // - if the “others” group has a single value, use it - // - if a category is already named "Others", use "…" instead - const bars = [...visible]; - const Other = {toString() {return "Other"}}; - const Null = {toString() {return "null"}}; + + const Other = {toString() {return "…"}} + const Null = {toString() {return "ø"}}; if (others) bars.push([Other, others]); if (nulls) bars.push([Null, nulls]); From e33a364f4e44938012c67313a72a9d090cdf22f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 1 Apr 2024 21:10:19 +0200 Subject: [PATCH 12/21] ordinal --- docs/summary-table.md | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/docs/summary-table.md b/docs/summary-table.md index dc8a78eae..5ca583a31 100644 --- a/docs/summary-table.md +++ b/docs/summary-table.md @@ -145,9 +145,9 @@ async function summary(div, filters, refresh) { if (distinct.size <= capped && !distinct.has(v)) distinct.add(v); } - // categorical - if (type === "Utf8") { - const stackOptions = {order: "sum", reverse: true}; + const categorical = type === "Utf8"; + const ordinal = !categorical && distinct.size <= 10; + if (categorical || ordinal) { let counts = new Map(); let nulls = 0; for (const v of values) { @@ -161,6 +161,7 @@ async function summary(div, filters, refresh) { const others = d3.sum(counts, ([key, c]) => visible.has(key) ? 0 : c); const bars = [...visible]; + if (ordinal) bars.sort(([a], [b]) => +a - +b); const Other = {toString() {return "…"}} const Null = {toString() {return "ø"}}; @@ -178,7 +179,7 @@ async function summary(div, filters, refresh) { marginTop: 0, marginBottom: 13, marks: [ - Plot.barX(bars, {x: "1", insetRight: 1, fill: ([x]) => typeof x === "string" ? "var(--theme-foreground-focus)" : "gray"}), + Plot.barX(bars, {x: "1", insetRight: 1, fill: ([x]) => typeof x === "object" ? "gray" : "var(--theme-foreground-focus)"}), Plot.text(bars, Plot.stackX({text: "0", x: "1", fill: "var(--plot-background)", pointerEvents: "none"})), ] }); @@ -208,25 +209,6 @@ async function summary(div, filters, refresh) { }); } - // ordinal - else if (distinct.size <= 10) { - const stackOptions = {order: "z"}; - chart = Plot.plot({ - width, - height, - style: "overflow: visible;", - x: {axis: null}, - y: {axis: null}, - marginLeft: 2, - marginRight: 2, - marginTop: 0, - marginBottom: 13, - marks: [ - Plot.barX(values, Plot.stackX(stackOptions, Plot.groupZ({x: "count"}, {z: Plot.identity, insetRight: 1, fill: "var(--theme-foreground-focus)"}))), - Plot.text(values, Plot.stackX(stackOptions, Plot.groupZ({x: "count", text: "first"}, {z: Plot.identity, fill: "var(--plot-background)"}))), - ] - }); - } // temporal, quantitative else { const niceK = 5; From b1177f58633a96258628c40a7592925657dcc9bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Tue, 2 Apr 2024 14:10:42 +0200 Subject: [PATCH 13/21] brush --- docs/summary-table.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/summary-table.md b/docs/summary-table.md index 5ca583a31..bb1c8eefb 100644 --- a/docs/summary-table.md +++ b/docs/summary-table.md @@ -95,6 +95,9 @@ function table(data, options = {}) { // In the meantime, here's a very dirty approach const _data = index === index0 ? data : take(data, index); table.replaceWith(table = _Inputs.table(_data, options)); + d3.select(table) + .style("min-width", `${120 * fields.length}px`) + .style("max-width", `${280 * fields.length}px`); const th = d3.select(table).selectAll("th"); th.append((d, i) => thtype[i]); th.append((d, i) => thsummary[i]); @@ -245,6 +248,18 @@ async function summary(div, filters, refresh) { Plot.axisX(ticks, {tickSize: 3, tickPadding: 2, fontSize: 8, ...(!isDate && {tickFormat: "s"})}), ] }); + + const X = Array.from(values, chart.scale("x").apply); + const brush = d3.brushX() + .on("end", refresh) + .on("brush", ({selection}) => { + if (selection) { + const [min, max] = selection; + filters.set(name, (i) => min <= X[i] && X[i] <= max); + } else filters.delete(name); + refresh(); // TODO debounce + }); + d3.select(chart).call(brush); } div.append(chart ? html`
Date: Tue, 2 Apr 2024 14:32:04 +0200 Subject: [PATCH 14/21] fixes, zero cross-results --- docs/summary-table.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/summary-table.md b/docs/summary-table.md index bb1c8eefb..a602410d7 100644 --- a/docs/summary-table.md +++ b/docs/summary-table.md @@ -56,6 +56,8 @@ function table(data, options = {}) { // Get the fields as described by Arrow, in the order given (potentially) by the options. const fields = (options.columns?.map(k => data.schema.find(({name}) => name === k)) ?? data.schema.fields).map(({name, type}) => ({name: String(name), type: String(type), values: data.getChild(name)})); + options.columns = fields.map(({name}) => name); + const container = document.createElement("div"); container.append(table); container.setAttribute("class", "summary-table"); @@ -68,7 +70,7 @@ function table(data, options = {}) { const summaries = th.append("div").classed("summary", true); const textFields = fields.filter(({type}) => type === "Utf8"); - const tally = html`
${data.numRows.toLocaleString("en-US")} rows
`; + const tally = html`
${data.numRows.toLocaleString("en-US")} rows
`; const footer = html`