Skip to content

Commit 5d06b6f

Browse files
authored
bin interval (#735)
* bin interval * Update README * backwards-compatible intervals
1 parent 94d69a5 commit 5d06b6f

File tree

7 files changed

+196
-9
lines changed

7 files changed

+196
-9
lines changed

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1423,6 +1423,7 @@ Plot.binX({y: "count", title: names => names.join("\n")}, {x: "economy (mpg)", t
14231423
To control how the quantitative dimensions *x* and *y* are divided into bins, the following options are supported:
14241424

14251425
* **thresholds** - the threshold values; see below
1426+
* **interval** - an alternative method of specifying thresholds
14261427
* **domain** - values outside the domain will be omitted
14271428
* **cumulative** - if positive, each bin will contain all lesser bins
14281429

@@ -1445,7 +1446,9 @@ The **thresholds** option may be specified as a named method or a variety of oth
14451446
* an interval or time interval (for temporal binning; see below)
14461447
* a function that returns an array, count, or time interval
14471448

1448-
If the **thresholds** option is specified as a function, it is passed three arguments: the array of input values, the domain minimum, and the domain maximum. If a number, [d3.ticks](https://github.com/d3/d3-array/blob/main/README.md#ticks) or [d3.utcTicks](https://github.com/d3/d3-time/blob/master/README.md#ticks) is used to choose suitable nice thresholds. If an interval, it must expose an *interval*.floor(*value*), *interval*.ceil(*value*), and *interval*.range(*start*, *stop*) methods. If the interval is a time interval such as d3.utcDay, or if the thresholds are specified as an array of dates, then the binned values are implicitly coerced to dates. Time intervals are intervals that are also functions that return a Date instance when called with no arguments.
1449+
If the **thresholds** option is specified as a function, it is passed three arguments: the array of input values, the domain minimum, and the domain maximum. If a number, [d3.ticks](https://github.com/d3/d3-array/blob/main/README.md#ticks) or [d3.utcTicks](https://github.com/d3/d3-time/blob/master/README.md#ticks) is used to choose suitable nice thresholds. If an interval, it must expose an *interval*.floor(*value*), *interval*.ceil(*value*), *interval*.offset(*value*), and *interval*.range(*start*, *stop*) methods. If the interval is a time interval such as d3.utcDay, or if the thresholds are specified as an array of dates, then the binned values are implicitly coerced to dates. Time intervals are intervals that are also functions that return a Date instance when called with no arguments.
1450+
1451+
If the **interval** option is used instead of **thresholds**, it may be either an interval, a time interval, or a number. If a number *n*, threshold values are consecutive multiples of *n* that span the domain; otherwise, the **interval** option is equivalent to the **thresholds** option. When the thresholds are specified as an interval, and the default **domain** is used, the domain will automatically be extended to start and end to align with the interval.
14491452

14501453
The bin transform supports grouping in addition to binning: you can subdivide bins by up to two additional ordinal or categorical dimensions (not including faceting). If any of **z**, **fill**, or **stroke** is a channel, the first of these channels will be used to subdivide bins. Similarly, Plot.binX will group on **y** if **y** is not an output channel, and Plot.binY will group on **x** if **x** is not an output channel. For example, for a stacked histogram:
14511454

src/transforms/bin.js

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import {coerceDate} from "../scales.js";
44
import {basic} from "./basic.js";
55
import {hasOutput, maybeEvaluator, maybeGroup, maybeOutput, maybeOutputs, maybeReduce, maybeSort, maybeSubgroup, reduceCount, reduceIdentity} from "./group.js";
66
import {maybeInsetX, maybeInsetY} from "./inset.js";
7+
import {maybeInterval} from "./interval.js";
78

89
// Group on {z, fill, stroke}, then optionally on y, then bin x.
910
export function binX(outputs = {y: "count"}, options = {}) {
@@ -78,6 +79,7 @@ function binn(
7879
domain, // eslint-disable-line no-unused-vars
7980
cumulative, // eslint-disable-line no-unused-vars
8081
thresholds, // eslint-disable-line no-unused-vars
82+
interval, // eslint-disable-line no-unused-vars
8183
...options
8284
} = inputs;
8385
const [GZ, setGZ] = maybeLazyChannel(z);
@@ -152,17 +154,18 @@ function binn(
152154
}
153155

154156
// Allow bin options to be specified as part of outputs; merge them into options.
155-
function mergeOptions({cumulative, domain, thresholds, ...outputs}, options) {
156-
return [outputs, {cumulative, domain, thresholds, ...options}];
157+
function mergeOptions({cumulative, domain, thresholds, interval, ...outputs}, options) {
158+
return [outputs, {cumulative, domain, thresholds, interval, ...options}];
157159
}
158160

159-
function maybeBinValue(value, {cumulative, domain, thresholds}, defaultValue) {
161+
function maybeBinValue(value, {cumulative, domain, thresholds, interval}, defaultValue) {
160162
value = {...maybeValue(value)};
161163
if (value.domain === undefined) value.domain = domain;
162164
if (value.cumulative === undefined) value.cumulative = cumulative;
163165
if (value.thresholds === undefined) value.thresholds = thresholds;
166+
if (value.interval === undefined) value.interval = interval;
164167
if (value.value === undefined) value.value = defaultValue;
165-
value.thresholds = maybeThresholds(value.thresholds);
168+
value.thresholds = maybeThresholds(value.thresholds, value.interval);
166169
return value;
167170
}
168171

@@ -194,7 +197,18 @@ function maybeBin(options) {
194197
}
195198
bin.thresholds(t).domain([min, max]);
196199
} else {
197-
bin.thresholds(thresholds).domain(domain);
200+
let d = domain;
201+
let t = thresholds;
202+
if (isInterval(t)) {
203+
let [min, max] = typeof d === "function" ? d(V) : d;
204+
if (d === extent) {
205+
min = t.floor(min);
206+
max = t.offset(t.floor(max));
207+
d = [min, max];
208+
}
209+
t = t.range(min, max);
210+
}
211+
bin.thresholds(t).domain(d);
198212
}
199213
let bins = bin(range(data)).map(binset);
200214
if (cumulative) bins = (cumulative < 0 ? bins.reverse() : bins).map(bincumset);
@@ -204,7 +218,10 @@ function maybeBin(options) {
204218
return bin;
205219
}
206220

207-
function maybeThresholds(thresholds = thresholdAuto) {
221+
function maybeThresholds(thresholds, interval) {
222+
if (thresholds === undefined) {
223+
return interval === undefined ? thresholdAuto : maybeRangeInterval(interval);
224+
}
208225
if (typeof thresholds === "string") {
209226
switch (thresholds.toLowerCase()) {
210227
case "freedman-diaconis": return thresholdFreedmanDiaconis;
@@ -217,6 +234,13 @@ function maybeThresholds(thresholds = thresholdAuto) {
217234
return thresholds; // pass array, count, or function to bin.thresholds
218235
}
219236

237+
// Unlike the interval transform, we require a range method, too.
238+
function maybeRangeInterval(interval) {
239+
interval = maybeInterval(interval);
240+
if (!isInterval(interval)) throw new Error("invalid interval");
241+
return interval;
242+
}
243+
220244
function thresholdAuto(values, min, max) {
221245
return Math.min(200, thresholdScott(values, min, max));
222246
}

src/transforms/interval.js

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,20 @@
1+
import {range} from "d3";
12
import {labelof, maybeValue, valueof} from "../options.js";
23
import {maybeInsetX, maybeInsetY} from "./inset.js";
34

45
// TODO Allow the interval to be specified as a string, e.g. “day” or “hour”?
56
// This will require the interval knowing the type of the associated scale to
67
// chose between UTC and local time (or better, an explicit timeZone option).
7-
function maybeInterval(interval) {
8+
export function maybeInterval(interval) {
89
if (interval == null) return;
910
if (typeof interval === "number") {
1011
const n = interval;
1112
// Note: this offset doesn’t support the optional step argument for simplicity.
12-
interval = {floor: d => n * Math.floor(d / n), offset: d => d + n};
13+
return {
14+
floor: d => n * Math.floor(d / n),
15+
offset: d => d + n,
16+
range: (lo, hi) => range(Math.ceil(lo / n), hi / n).map(x => n * x)
17+
};
1318
}
1419
if (typeof interval.floor !== "function" || typeof interval.offset !== "function") throw new Error("invalid interval");
1520
return interval;

test/data/timestamps.csv

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
timestamp
2+
2022-01-31T18:16:31.964Z
3+
2022-01-30T14:37:47.021Z
4+
2022-01-29T15:04:34.007Z
5+
2022-01-29T14:23:45.360Z
6+
2022-01-28T21:42:41.078Z
7+
2022-01-28T20:12:55.377Z
8+
2022-01-28T16:23:43.560Z
9+
2022-01-28T15:22:18.908Z
10+
2022-01-27T17:26:40.052Z
11+
2022-01-26T14:55:15.640Z
12+
2022-01-25T19:33:52.923Z
13+
2022-01-25T14:53:29.893Z
14+
2022-01-24T17:17:34.008Z
15+
2022-01-24T14:10:27.382Z
16+
2022-01-23T16:33:33.685Z
17+
2022-01-23T14:52:40.121Z
18+
2022-01-21T21:39:51.625Z
19+
2022-01-21T19:50:38.138Z
20+
2022-01-21T17:32:19.085Z
21+
2022-01-21T16:06:57.899Z
22+
2022-01-21T15:52:38.116Z
23+
2022-01-21T15:01:52.420Z
24+
2022-01-20T19:12:16.275Z
25+
2022-01-19T16:47:56.095Z
26+
2022-01-18T19:04:53.190Z
27+
2022-01-18T16:14:21.534Z
28+
2022-01-18T14:24:26.530Z
29+
2022-01-16T17:48:41.245Z
30+
2022-01-16T17:32:13.164Z
31+
2022-01-14T18:11:06.535Z
32+
2022-01-13T19:07:52.806Z
33+
2022-01-13T18:04:59.779Z
34+
2022-01-13T16:40:20.998Z
35+
2022-01-13T15:59:31.069Z
36+
2022-01-12T22:17:03.540Z
37+
2022-01-12T15:43:46.363Z
38+
2022-01-12T14:09:15.628Z
39+
2022-01-11T23:05:25.974Z
40+
2022-01-11T15:00:48.222Z
41+
2022-01-11T14:50:12.751Z
42+
2022-01-10T23:42:59.140Z
43+
2022-01-10T17:21:09.829Z
44+
2022-01-08T15:10:26.196Z
45+
2022-01-08T14:00:18.029Z
46+
2022-01-07T16:50:55.843Z
47+
2022-01-05T19:06:41.335Z
48+
2022-01-05T15:25:46.501Z
49+
2022-01-05T14:33:12.087Z
50+
2022-01-05T13:31:23.154Z
51+
2022-01-04T23:36:29.180Z
52+
2022-01-04T20:55:16.325Z
53+
2022-01-04T19:35:17.920Z
54+
2022-01-04T17:01:15.535Z
55+
2022-01-04T15:15:14.705Z
56+
2022-01-03T22:12:59.816Z

test/output/intradayHistogram.svg

Lines changed: 87 additions & 0 deletions
Loading

test/plots/index.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ export {default as industryUnemploymentShare} from "./industry-unemployment-shar
6161
export {default as industryUnemploymentStream} from "./industry-unemployment-stream.js";
6262
export {default as industryUnemploymentTrack} from "./industry-unemployment-track.js";
6363
export {default as infinityLog} from "./infinity-log.js";
64+
export {default as intradayHistogram} from "./intraday-histogram.js";
6465
export {default as learningPoverty} from "./learning-poverty.js";
6566
export {default as letterFrequencyBar} from "./letter-frequency-bar.js";
6667
export {default as letterFrequencyCloud} from "./letter-frequency-cloud.js";

test/plots/intraday-histogram.js

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import * as Plot from "@observablehq/plot";
2+
import * as d3 from "d3";
3+
4+
export default async function() {
5+
const timestamps = await d3.csv("data/timestamps.csv", d3.autoType);
6+
return Plot.plot({
7+
marks: [
8+
Plot.rectY(timestamps, Plot.binX({y: "count"}, {x: d => d.timestamp.getUTCHours(), interval: 1}))
9+
]
10+
});
11+
}

0 commit comments

Comments
 (0)