Skip to content

Commit 875bf76

Browse files
Annie Zhangmbostock
Annie Zhang
andauthored
Add __table function and unit tests (#304)
* Support in-memory table data source * add isTypedArray check * revert makeQueryTemplate change, port isDatabaseClient, Float64Array, throw if source isn't valid * tweaks to __table (#305) * tweaks to __table * match expected schema * “resolved” table filter operands * test against sort mutation * remove export * filter eq for dates, lte, gte * isDataArray * move comment * improve error message Co-authored-by: Mike Bostock <[email protected]>
1 parent 3068f6a commit 875bf76

File tree

5 files changed

+552
-166
lines changed

5 files changed

+552
-166
lines changed

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
"dist/**/*.js"
2323
],
2424
"dependencies": {
25+
"d3-array": "^3.2.0",
2526
"d3-dsv": "^2.0.0",
2627
"d3-require": "^1.3.0"
2728
},

src/index.mjs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
export {default as FileAttachments, AbstractFile} from "./fileAttachment.mjs";
22
export {default as Library} from "./library.mjs";
3-
export {makeQueryTemplate} from "./table.mjs";
3+
export {makeQueryTemplate, isDataArray, isDatabaseClient} from "./table.mjs";

src/table.mjs

Lines changed: 265 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,153 @@
1+
import {ascending, descending, reverse} from "d3-array";
2+
3+
const nChecks = 20; // number of values to check in each array
4+
5+
// We support two levels of DatabaseClient. The simplest DatabaseClient
6+
// implements only the client.sql tagged template literal. More advanced
7+
// DatabaseClients implement client.query and client.queryStream, which support
8+
// streaming and abort, and the client.queryTag tagged template literal is used
9+
// to translate the contents of a SQL cell or Table cell into the appropriate
10+
// arguments for calling client.query or client.queryStream. For table cells, we
11+
// additionally require client.describeColumns. The client.describeTables method
12+
// is optional.
13+
export function isDatabaseClient(value, mode) {
14+
return (
15+
value &&
16+
(typeof value.sql === "function" ||
17+
(typeof value.queryTag === "function" &&
18+
(typeof value.query === "function" ||
19+
typeof value.queryStream === "function"))) &&
20+
(mode !== "table" || typeof value.describeColumns === "function") &&
21+
value !== __query // don’t match our internal helper
22+
);
23+
}
24+
25+
// Returns true if the value is a typed array (for a single-column table), or if
26+
// it’s an array. In the latter case, the elements of the array must be
27+
// consistently typed: either plain objects or primitives or dates.
28+
export function isDataArray(value) {
29+
return (
30+
(Array.isArray(value) &&
31+
(isQueryResultSetSchema(value.schema) ||
32+
isQueryResultSetColumns(value.columns) ||
33+
arrayContainsObjects(value) ||
34+
arrayContainsPrimitives(value) ||
35+
arrayContainsDates(value))) ||
36+
isTypedArray(value)
37+
);
38+
}
39+
40+
// Given an array, checks that the given value is an array that does not contain
41+
// any primitive values (at least for the first few values that we check), and
42+
// that the first object contains enumerable keys (see computeSchema for how we
43+
// infer the columns). We assume that the contents of the table are homogenous,
44+
// but we don’t currently enforce this.
45+
// https://observablehq.com/@observablehq/database-client-specification#§1
46+
function arrayContainsObjects(value) {
47+
const n = Math.min(nChecks, value.length);
48+
for (let i = 0; i < n; ++i) {
49+
const v = value[i];
50+
if (v === null || typeof v !== "object") return false;
51+
}
52+
return n > 0 && objectHasEnumerableKeys(value[0]);
53+
}
54+
55+
// Using a for-in loop here means that we can abort after finding at least one
56+
// enumerable key (whereas Object.keys would require materializing the array of
57+
// all keys, which would be considerably slower if the value has many keys!).
58+
// This function assumes that value is an object; see arrayContainsObjects.
59+
function objectHasEnumerableKeys(value) {
60+
for (const _ in value) return true;
61+
return false;
62+
}
63+
64+
function isQueryResultSetSchema(schemas) {
65+
return (Array.isArray(schemas) && schemas.every((s) => s && typeof s.name === "string"));
66+
}
67+
68+
function isQueryResultSetColumns(columns) {
69+
return (Array.isArray(columns) && columns.every((name) => typeof name === "string"));
70+
}
71+
72+
// Returns true if the value represents an array of primitives (i.e., a
73+
// single-column table). This should only be passed values for which
74+
// canDisplayTable returns true.
75+
function arrayIsPrimitive(value) {
76+
return (
77+
isTypedArray(value) ||
78+
arrayContainsPrimitives(value) ||
79+
arrayContainsDates(value)
80+
);
81+
}
82+
83+
// Given an array, checks that the first n elements are primitives (number,
84+
// string, boolean, bigint) of a consistent type.
85+
function arrayContainsPrimitives(value) {
86+
const n = Math.min(nChecks, value.length);
87+
if (!(n > 0)) return false;
88+
let type;
89+
let hasPrimitive = false; // ensure we encounter 1+ primitives
90+
for (let i = 0; i < n; ++i) {
91+
const v = value[i];
92+
if (v == null) continue; // ignore null and undefined
93+
const t = typeof v;
94+
if (type === undefined) {
95+
switch (t) {
96+
case "number":
97+
case "boolean":
98+
case "string":
99+
case "bigint":
100+
type = t;
101+
break;
102+
default:
103+
return false;
104+
}
105+
} else if (t !== type) {
106+
return false;
107+
}
108+
hasPrimitive = true;
109+
}
110+
return hasPrimitive;
111+
}
112+
113+
// Given an array, checks that the first n elements are dates.
114+
function arrayContainsDates(value) {
115+
const n = Math.min(nChecks, value.length);
116+
if (!(n > 0)) return false;
117+
let hasDate = false; // ensure we encounter 1+ dates
118+
for (let i = 0; i < n; ++i) {
119+
const v = value[i];
120+
if (v == null) continue; // ignore null and undefined
121+
if (!(v instanceof Date)) return false;
122+
hasDate = true;
123+
}
124+
return hasDate;
125+
}
126+
127+
function isTypedArray(value) {
128+
return (
129+
value instanceof Int8Array ||
130+
value instanceof Int16Array ||
131+
value instanceof Int32Array ||
132+
value instanceof Uint8Array ||
133+
value instanceof Uint8ClampedArray ||
134+
value instanceof Uint16Array ||
135+
value instanceof Uint32Array ||
136+
value instanceof Float32Array ||
137+
value instanceof Float64Array
138+
);
139+
}
140+
141+
// __query is used by table cells; __query.sql is used by SQL cells.
1142
export const __query = Object.assign(
2-
// This function is used by table cells.
3143
async (source, operations, invalidation) => {
4-
const args = makeQueryTemplate(operations, await source);
5-
if (!args) return null; // the empty state
6-
return evaluateQuery(await source, args, invalidation);
144+
source = await source;
145+
if (isDatabaseClient(source)) return evaluateQuery(source, makeQueryTemplate(operations, source), invalidation);
146+
if (isDataArray(source)) return __table(source, operations);
147+
if (!source) throw new Error("missing data source");
148+
throw new Error("invalid data source");
7149
},
8150
{
9-
// This function is used by SQL cells.
10151
sql(source, invalidation) {
11152
return async function () {
12153
return evaluateQuery(source, arguments, invalidation);
@@ -16,7 +157,7 @@ export const __query = Object.assign(
16157
);
17158

18159
async function evaluateQuery(source, args, invalidation) {
19-
if (!source) return;
160+
if (!source) throw new Error("missing data source");
20161

21162
// If this DatabaseClient supports abort and streaming, use that.
22163
if (typeof source.queryTag === "function") {
@@ -73,17 +214,15 @@ async function* accumulateQuery(queryRequest) {
73214
* of sub-strings and params are the parameter values to be inserted between each
74215
* sub-string.
75216
*/
76-
export function makeQueryTemplate(operations, source) {
217+
export function makeQueryTemplate(operations, source) {
77218
const escaper =
78-
source && typeof source.escape === "function" ? source.escape : (i) => i;
219+
typeof source.escape === "function" ? source.escape : (i) => i;
79220
const {select, from, filter, sort, slice} = operations;
80-
if (
81-
from.table === null ||
82-
select.columns === null ||
83-
(select.columns && select.columns.length === 0)
84-
)
85-
return;
86-
const columns = select.columns.map((c) => `t.${escaper(c)}`);
221+
if (!from.table)
222+
throw new Error("missing from table");
223+
if (select.columns?.length === 0)
224+
throw new Error("at least one column must be selected");
225+
const columns = select.columns ? select.columns.map((c) => `t.${escaper(c)}`) : "*";
87226
const args = [
88227
[`SELECT ${columns} FROM ${formatTable(from.table, escaper)} t`]
89228
];
@@ -108,7 +247,7 @@ async function* accumulateQuery(queryRequest) {
108247
}
109248

110249
function formatTable(table, escaper) {
111-
if (typeof table === "object") {
250+
if (typeof table === "object") { // i.e., not a bare string specifier
112251
let from = "";
113252
if (table.database != null) from += escaper(table.database) + ".";
114253
if (table.schema != null) from += escaper(table.schema) + ".";
@@ -231,3 +370,113 @@ function likeOperand(operand) {
231370
return {...operand, value: `%${operand.value}%`};
232371
}
233372

373+
// This function applies table cell operations to an in-memory table (array of
374+
// objects); it should be equivalent to the corresponding SQL query.
375+
export function __table(source, operations) {
376+
if (arrayIsPrimitive(source)) source = Array.from(source, (value) => ({value}));
377+
const input = source;
378+
let {schema, columns} = source;
379+
for (const {type, operands} of operations.filter) {
380+
const [{value: column}] = operands;
381+
const values = operands.slice(1).map(({value}) => value);
382+
switch (type) {
383+
case "eq": {
384+
const [value] = values;
385+
if (value instanceof Date) {
386+
const time = +value; // compare as primitive
387+
source = source.filter((d) => +d[column] === time);
388+
} else {
389+
source = source.filter((d) => d[column] === value);
390+
}
391+
break;
392+
}
393+
case "ne": {
394+
const [value] = values;
395+
source = source.filter((d) => d[column] !== value);
396+
break;
397+
}
398+
case "c": {
399+
const [value] = values;
400+
source = source.filter(
401+
(d) => typeof d[column] === "string" && d[column].includes(value)
402+
);
403+
break;
404+
}
405+
case "nc": {
406+
const [value] = values;
407+
source = source.filter(
408+
(d) => typeof d[column] === "string" && !d[column].includes(value)
409+
);
410+
break;
411+
}
412+
case "in": {
413+
const set = new Set(values); // TODO support dates?
414+
source = source.filter((d) => set.has(d[column]));
415+
break;
416+
}
417+
case "nin": {
418+
const set = new Set(values); // TODO support dates?
419+
source = source.filter((d) => !set.has(d[column]));
420+
break;
421+
}
422+
case "n": {
423+
source = source.filter((d) => d[column] == null);
424+
break;
425+
}
426+
case "nn": {
427+
source = source.filter((d) => d[column] != null);
428+
break;
429+
}
430+
case "lt": {
431+
const [value] = values;
432+
source = source.filter((d) => d[column] < value);
433+
break;
434+
}
435+
case "lte": {
436+
const [value] = values;
437+
source = source.filter((d) => d[column] <= value);
438+
break;
439+
}
440+
case "gt": {
441+
const [value] = values;
442+
source = source.filter((d) => d[column] > value);
443+
break;
444+
}
445+
case "gte": {
446+
const [value] = values;
447+
source = source.filter((d) => d[column] >= value);
448+
break;
449+
}
450+
default:
451+
throw new Error(`unknown filter type: ${type}`);
452+
}
453+
}
454+
for (const {column, direction} of reverse(operations.sort)) {
455+
const compare = direction === "desc" ? descending : ascending;
456+
if (source === input) source = source.slice(); // defensive copy
457+
source.sort((a, b) => compare(a[column], b[column]));
458+
}
459+
let {from, to} = operations.slice;
460+
from = from == null ? 0 : Math.max(0, from);
461+
to = to == null ? Infinity : Math.max(0, to);
462+
if (from > 0 || to < Infinity) {
463+
source = source.slice(Math.max(0, from), Math.max(0, to));
464+
}
465+
if (operations.select.columns) {
466+
if (schema) {
467+
const schemaByName = new Map(schema.map((s) => [s.name, s]));
468+
schema = operations.select.columns.map((c) => schemaByName.get(c));
469+
}
470+
if (columns) {
471+
columns = operations.select.columns;
472+
}
473+
source = source.map((d) =>
474+
Object.fromEntries(operations.select.columns.map((c) => [c, d[c]]))
475+
);
476+
}
477+
if (source !== input) {
478+
if (schema) source.schema = schema;
479+
if (columns) source.columns = columns;
480+
}
481+
return source;
482+
}

0 commit comments

Comments
 (0)