@@ -21,7 +21,7 @@ SELECT a::int
21
21
```
22
22
23
23
``` sql echo
24
- FROM range(10 , 22 , 2 );
24
+ SELECT floor(sqrt(range)), count () FROM range(10 , 2278 , 2 ) GROUP BY 1 ORDER BY 2 DESC , 1 DESC LIMIT 10 ;
25
25
```
26
26
27
27
``` sql echo
@@ -77,21 +77,56 @@ function summary({name, type, values}) {
77
77
let chart;
78
78
79
79
// Count values, NaN, nulls, distinct
80
- // TODO use DuckdB?
80
+ // TODO optimize with DuckdB?
81
81
let max = - Infinity ;
82
82
let min = Infinity ;
83
+ let count = 0 ;
83
84
let nulls = 0 ;
84
85
const distinct = new Set ();
85
86
const capped = 100 ; // max number of distinct values to count
86
87
for (const v of values) {
87
88
if (v == null ) {nulls++ ; continue ;}
89
+ count++ ;
88
90
if (min > v) min = v; // note this works for strings too
89
91
if (max < v) max = v;
90
92
if (distinct .size <= capped && ! distinct .has (v)) distinct .add (v);
91
93
}
92
94
93
- if (distinct .size <= 10 || type === " Utf8" ) {
94
- const stackOptions = (type === " Utf8" ) ? {order: " sum" , reverse: true } : {order: " value" };
95
+ // categorical
96
+ if (type === " Utf8" ) {
97
+ const stackOptions = {order: " sum" , reverse: true };
98
+ const counts = new Map ();
99
+ for (const v of values) {
100
+ if (v == null ) continue ;
101
+ if (counts .has (v)) counts .set (v, 1 + counts .get (v)); else counts .set (v, 1 );
102
+ }
103
+ const topX = d3 .sort (counts, ([, c ]) => - c).slice (0 , 10 );
104
+ const visible = new Set (topX .filter (([, c ]) => c / count > 0.1 ).map (([key ]) => key));
105
+ // TODO:
106
+ // - if the “others” group has a single value, use it
107
+ // - if a category is already named "Others", use "…" instead
108
+ // - separate the nulls
109
+
110
+ chart = Plot .plot ({
111
+ width,
112
+ height,
113
+ style: " overflow: visible;" ,
114
+ x: {axis: null },
115
+ y: {axis: null },
116
+ marginLeft: 2 ,
117
+ marginRight: 2 ,
118
+ marginTop: 0 ,
119
+ marginBottom: 13 ,
120
+ marks: [
121
+ Plot .barX (values, Plot .stackX (stackOptions, Plot .groupZ ({x: " count" }, {z : d => visible .has (d) ? d : " Others" , insetRight: 1 , fill: " var(--theme-foreground-focus)" }))),
122
+ Plot .text (values, Plot .stackX (stackOptions, Plot .groupZ ({x: " count" , text: " first" }, {text : d => visible .has (d) ? d : " Others" , z : d => visible .has (d) ? d : " Others" , fill: " var(--plot-background)" }))),
123
+ ]
124
+ });
125
+ }
126
+
127
+ // ordinal
128
+ else if (distinct .size <= 10 ) {
129
+ const stackOptions = {order: " z" };
95
130
chart = Plot .plot ({
96
131
width,
97
132
height,
@@ -108,6 +143,7 @@ function summary({name, type, values}) {
108
143
]
109
144
});
110
145
}
146
+ // temporal, quantitative
111
147
else {
112
148
const thresholds = Math .max (10 , Math .min (50 , d3 .thresholdScott (values, min, max))); // TODO optimize thresholdScott
113
149
chart = Plot .plot ({
@@ -150,7 +186,7 @@ function summary({name, type, values}) {
150
186
151
187
<style>
152
188
153
- table .type {font-size: smaller; font-weight: normal; height: 1.35em;}
189
+ table .type {font-size: smaller; font-weight: normal; color: var(--theme-foreground-muted); height: 1.35em;}
154
190
table .summary {font-size: smaller; font-weight: normal; height: 33px;}
155
191
footer {font-family: var(--sans-serif); font-size: small; color: var(--theme-foreground-faint)}
156
192
0 commit comments