Skip to content

Add __table function and unit tests #304

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"dist/**/*.js"
],
"dependencies": {
"d3-array": "^3.2.0",
"d3-dsv": "^2.0.0",
"d3-require": "^1.3.0"
},
Expand Down
2 changes: 1 addition & 1 deletion src/index.mjs
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
export {default as FileAttachments, AbstractFile} from "./fileAttachment.mjs";
export {default as Library} from "./library.mjs";
export {makeQueryTemplate} from "./table.mjs";
export {makeQueryTemplate, isDataArray, isDatabaseClient} from "./table.mjs";
281 changes: 265 additions & 16 deletions src/table.mjs
Original file line number Diff line number Diff line change
@@ -1,12 +1,153 @@
import {ascending, descending, reverse} from "d3-array";

const nChecks = 20; // number of values to check in each array

// We support two levels of DatabaseClient. The simplest DatabaseClient
// implements only the client.sql tagged template literal. More advanced
// DatabaseClients implement client.query and client.queryStream, which support
// streaming and abort, and the client.queryTag tagged template literal is used
// to translate the contents of a SQL cell or Table cell into the appropriate
// arguments for calling client.query or client.queryStream. For table cells, we
// additionally require client.describeColumns. The client.describeTables method
// is optional.
export function isDatabaseClient(value, mode) {
return (
value &&
(typeof value.sql === "function" ||
(typeof value.queryTag === "function" &&
(typeof value.query === "function" ||
typeof value.queryStream === "function"))) &&
(mode !== "table" || typeof value.describeColumns === "function") &&
value !== __query // don’t match our internal helper
);
}

// Returns true if the value is a typed array (for a single-column table), or if
// it’s an array. In the latter case, the elements of the array must be
// consistently typed: either plain objects or primitives or dates.
export function isDataArray(value) {
return (
(Array.isArray(value) &&
(isQueryResultSetSchema(value.schema) ||
isQueryResultSetColumns(value.columns) ||
arrayContainsObjects(value) ||
arrayContainsPrimitives(value) ||
arrayContainsDates(value))) ||
isTypedArray(value)
);
}

// Given an array, checks that the given value is an array that does not contain
// any primitive values (at least for the first few values that we check), and
// that the first object contains enumerable keys (see computeSchema for how we
// infer the columns). We assume that the contents of the table are homogenous,
// but we don’t currently enforce this.
// https://observablehq.com/@observablehq/database-client-specification#§1
function arrayContainsObjects(value) {
const n = Math.min(nChecks, value.length);
for (let i = 0; i < n; ++i) {
const v = value[i];
if (v === null || typeof v !== "object") return false;
}
return n > 0 && objectHasEnumerableKeys(value[0]);
}

// Using a for-in loop here means that we can abort after finding at least one
// enumerable key (whereas Object.keys would require materializing the array of
// all keys, which would be considerably slower if the value has many keys!).
// This function assumes that value is an object; see arrayContainsObjects.
function objectHasEnumerableKeys(value) {
for (const _ in value) return true;
return false;
}

function isQueryResultSetSchema(schemas) {
return (Array.isArray(schemas) && schemas.every((s) => s && typeof s.name === "string"));
}

function isQueryResultSetColumns(columns) {
return (Array.isArray(columns) && columns.every((name) => typeof name === "string"));
}

// Returns true if the value represents an array of primitives (i.e., a
// single-column table). This should only be passed values for which
// canDisplayTable returns true.
function arrayIsPrimitive(value) {
return (
isTypedArray(value) ||
arrayContainsPrimitives(value) ||
arrayContainsDates(value)
);
}

// Given an array, checks that the first n elements are primitives (number,
// string, boolean, bigint) of a consistent type.
function arrayContainsPrimitives(value) {
const n = Math.min(nChecks, value.length);
if (!(n > 0)) return false;
let type;
let hasPrimitive = false; // ensure we encounter 1+ primitives
for (let i = 0; i < n; ++i) {
const v = value[i];
if (v == null) continue; // ignore null and undefined
const t = typeof v;
if (type === undefined) {
switch (t) {
case "number":
case "boolean":
case "string":
case "bigint":
type = t;
break;
default:
return false;
}
} else if (t !== type) {
return false;
}
hasPrimitive = true;
}
return hasPrimitive;
}

// Given an array, checks that the first n elements are dates.
function arrayContainsDates(value) {
const n = Math.min(nChecks, value.length);
if (!(n > 0)) return false;
let hasDate = false; // ensure we encounter 1+ dates
for (let i = 0; i < n; ++i) {
const v = value[i];
if (v == null) continue; // ignore null and undefined
if (!(v instanceof Date)) return false;
hasDate = true;
}
return hasDate;
}

function isTypedArray(value) {
return (
value instanceof Int8Array ||
value instanceof Int16Array ||
value instanceof Int32Array ||
value instanceof Uint8Array ||
value instanceof Uint8ClampedArray ||
value instanceof Uint16Array ||
value instanceof Uint32Array ||
value instanceof Float32Array ||
value instanceof Float64Array
);
}

// __query is used by table cells; __query.sql is used by SQL cells.
export const __query = Object.assign(
// This function is used by table cells.
async (source, operations, invalidation) => {
const args = makeQueryTemplate(operations, await source);
if (!args) return null; // the empty state
return evaluateQuery(await source, args, invalidation);
source = await source;
if (isDatabaseClient(source)) return evaluateQuery(source, makeQueryTemplate(operations, source), invalidation);
if (isDataArray(source)) return __table(source, operations);
if (!source) throw new Error("missing data source");
throw new Error("invalid data source");
},
{
// This function is used by SQL cells.
sql(source, invalidation) {
return async function () {
return evaluateQuery(source, arguments, invalidation);
Expand All @@ -16,7 +157,7 @@ export const __query = Object.assign(
);

async function evaluateQuery(source, args, invalidation) {
if (!source) return;
if (!source) throw new Error("missing data source");

// If this DatabaseClient supports abort and streaming, use that.
if (typeof source.queryTag === "function") {
Expand Down Expand Up @@ -73,17 +214,15 @@ async function* accumulateQuery(queryRequest) {
* of sub-strings and params are the parameter values to be inserted between each
* sub-string.
*/
export function makeQueryTemplate(operations, source) {
export function makeQueryTemplate(operations, source) {
const escaper =
source && typeof source.escape === "function" ? source.escape : (i) => i;
typeof source.escape === "function" ? source.escape : (i) => i;
const {select, from, filter, sort, slice} = operations;
if (
from.table === null ||
select.columns === null ||
(select.columns && select.columns.length === 0)
)
return;
const columns = select.columns.map((c) => `t.${escaper(c)}`);
if (!from.table)
throw new Error("missing from table");
if (select.columns?.length === 0)
throw new Error("at least one column must be selected");
const columns = select.columns ? select.columns.map((c) => `t.${escaper(c)}`) : "*";
const args = [
[`SELECT ${columns} FROM ${formatTable(from.table, escaper)} t`]
];
Expand All @@ -108,7 +247,7 @@ async function* accumulateQuery(queryRequest) {
}

function formatTable(table, escaper) {
if (typeof table === "object") {
if (typeof table === "object") { // i.e., not a bare string specifier
let from = "";
if (table.database != null) from += escaper(table.database) + ".";
if (table.schema != null) from += escaper(table.schema) + ".";
Expand Down Expand Up @@ -231,3 +370,113 @@ function likeOperand(operand) {
return {...operand, value: `%${operand.value}%`};
}

// This function applies table cell operations to an in-memory table (array of
// objects); it should be equivalent to the corresponding SQL query.
export function __table(source, operations) {
if (arrayIsPrimitive(source)) source = Array.from(source, (value) => ({value}));
const input = source;
let {schema, columns} = source;
for (const {type, operands} of operations.filter) {
const [{value: column}] = operands;
const values = operands.slice(1).map(({value}) => value);
switch (type) {
case "eq": {
const [value] = values;
if (value instanceof Date) {
const time = +value; // compare as primitive
source = source.filter((d) => +d[column] === time);
} else {
source = source.filter((d) => d[column] === value);
}
break;
}
case "ne": {
const [value] = values;
source = source.filter((d) => d[column] !== value);
break;
}
case "c": {
const [value] = values;
source = source.filter(
(d) => typeof d[column] === "string" && d[column].includes(value)
);
break;
}
case "nc": {
const [value] = values;
source = source.filter(
(d) => typeof d[column] === "string" && !d[column].includes(value)
);
break;
}
case "in": {
const set = new Set(values); // TODO support dates?
source = source.filter((d) => set.has(d[column]));
break;
}
case "nin": {
const set = new Set(values); // TODO support dates?
source = source.filter((d) => !set.has(d[column]));
break;
}
case "n": {
source = source.filter((d) => d[column] == null);
break;
}
case "nn": {
source = source.filter((d) => d[column] != null);
break;
}
case "lt": {
const [value] = values;
source = source.filter((d) => d[column] < value);
break;
}
case "lte": {
const [value] = values;
source = source.filter((d) => d[column] <= value);
break;
}
case "gt": {
const [value] = values;
source = source.filter((d) => d[column] > value);
break;
}
case "gte": {
const [value] = values;
source = source.filter((d) => d[column] >= value);
break;
}
default:
throw new Error(`unknown filter type: ${type}`);
}
}
for (const {column, direction} of reverse(operations.sort)) {
const compare = direction === "desc" ? descending : ascending;
if (source === input) source = source.slice(); // defensive copy
source.sort((a, b) => compare(a[column], b[column]));
}
let {from, to} = operations.slice;
from = from == null ? 0 : Math.max(0, from);
to = to == null ? Infinity : Math.max(0, to);
if (from > 0 || to < Infinity) {
source = source.slice(Math.max(0, from), Math.max(0, to));
}
if (operations.select.columns) {
if (schema) {
const schemaByName = new Map(schema.map((s) => [s.name, s]));
schema = operations.select.columns.map((c) => schemaByName.get(c));
}
if (columns) {
columns = operations.select.columns;
}
source = source.map((d) =>
Object.fromEntries(operations.select.columns.map((c) => [c, d[c]]))
);
}
if (source !== input) {
if (schema) source.schema = schema;
if (columns) source.columns = columns;
}
return source;
}
Loading