|
| 1 | +import {getArrowTableSchema, isArrowTable} from "./arrow.js"; |
| 2 | +import {arrow9 as arrow, duckdb} from "./dependencies.js"; |
| 3 | +import {FileAttachment} from "./fileAttachment.js"; |
| 4 | +import {cdn} from "./require.js"; |
| 5 | + |
| 6 | +// Adapted from https://observablehq.com/@cmudig/duckdb-client |
| 7 | +// Copyright 2021 CMU Data Interaction Group |
| 8 | +// |
| 9 | +// Redistribution and use in source and binary forms, with or without |
| 10 | +// modification, are permitted provided that the following conditions are met: |
| 11 | +// |
| 12 | +// 1. Redistributions of source code must retain the above copyright notice, |
| 13 | +// this list of conditions and the following disclaimer. |
| 14 | +// |
| 15 | +// 2. Redistributions in binary form must reproduce the above copyright notice, |
| 16 | +// this list of conditions and the following disclaimer in the documentation |
| 17 | +// and/or other materials provided with the distribution. |
| 18 | +// |
| 19 | +// 3. Neither the name of the copyright holder nor the names of its contributors |
| 20 | +// may be used to endorse or promote products derived from this software |
| 21 | +// without specific prior written permission. |
| 22 | +// |
| 23 | +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 24 | +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 25 | +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 26 | +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
| 27 | +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 28 | +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 29 | +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 30 | +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 31 | +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 32 | +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 33 | +// POSSIBILITY OF SUCH DAMAGE. |
| 34 | + |
| 35 | +export class DuckDBClient { |
| 36 | + constructor(db) { |
| 37 | + Object.defineProperties(this, { |
| 38 | + _db: {value: db} |
| 39 | + }); |
| 40 | + } |
| 41 | + |
| 42 | + async queryStream(query, params) { |
| 43 | + const connection = await this._db.connect(); |
| 44 | + let reader, batch; |
| 45 | + try { |
| 46 | + reader = await connection.send(query, params); |
| 47 | + batch = await reader.next(); |
| 48 | + if (batch.done) throw new Error("missing first batch"); |
| 49 | + } catch (error) { |
| 50 | + await connection.close(); |
| 51 | + throw error; |
| 52 | + } |
| 53 | + return { |
| 54 | + schema: getArrowTableSchema(batch.value), |
| 55 | + async *readRows() { |
| 56 | + try { |
| 57 | + while (!batch.done) { |
| 58 | + yield batch.value.toArray(); |
| 59 | + batch = await reader.next(); |
| 60 | + } |
| 61 | + } finally { |
| 62 | + await connection.close(); |
| 63 | + } |
| 64 | + } |
| 65 | + }; |
| 66 | + } |
| 67 | + |
| 68 | + async query(query, params) { |
| 69 | + const result = await this.queryStream(query, params); |
| 70 | + const results = []; |
| 71 | + for await (const rows of result.readRows()) { |
| 72 | + for (const row of rows) { |
| 73 | + results.push(row); |
| 74 | + } |
| 75 | + } |
| 76 | + results.schema = result.schema; |
| 77 | + return results; |
| 78 | + } |
| 79 | + |
| 80 | + async queryRow(query, params) { |
| 81 | + const result = await this.queryStream(query, params); |
| 82 | + const reader = result.readRows(); |
| 83 | + try { |
| 84 | + const {done, value} = await reader.next(); |
| 85 | + return done || !value.length ? null : value[0]; |
| 86 | + } finally { |
| 87 | + await reader.return(); |
| 88 | + } |
| 89 | + } |
| 90 | + |
| 91 | + async sql(strings, ...args) { |
| 92 | + return await this.query(strings.join("?"), args); |
| 93 | + } |
| 94 | + |
| 95 | + queryTag(strings, ...params) { |
| 96 | + return [strings.join("?"), params]; |
| 97 | + } |
| 98 | + |
| 99 | + escape(name) { |
| 100 | + return `"${name}"`; |
| 101 | + } |
| 102 | + |
| 103 | + async describeTables() { |
| 104 | + const tables = await this.query(`SHOW TABLES`); |
| 105 | + return tables.map(({name}) => ({name})); |
| 106 | + } |
| 107 | + |
| 108 | + async describeColumns({table} = {}) { |
| 109 | + const columns = await this.query(`DESCRIBE ${table}`); |
| 110 | + return columns.map(({column_name, column_type, null: nullable}) => ({ |
| 111 | + name: column_name, |
| 112 | + type: getDuckDBType(column_type), |
| 113 | + nullable: nullable !== "NO", |
| 114 | + databaseType: column_type |
| 115 | + })); |
| 116 | + } |
| 117 | + |
| 118 | + static async of(sources = {}, config = {}) { |
| 119 | + const db = await createDuckDB(); |
| 120 | + if (config.query?.castTimestampToDate === undefined) { |
| 121 | + config = {...config, query: {...config.query, castTimestampToDate: true}}; |
| 122 | + } |
| 123 | + await db.open(config); |
| 124 | + await Promise.all( |
| 125 | + Object.entries(sources).map(async ([name, source]) => { |
| 126 | + if (source instanceof FileAttachment) { // bare file |
| 127 | + await insertFile(db, name, source); |
| 128 | + } else if (isArrowTable(source)) { // bare arrow table |
| 129 | + await insertArrowTable(db, name, source); |
| 130 | + } else if (Array.isArray(source)) { // bare array of objects |
| 131 | + await insertArray(db, name, source); |
| 132 | + } else if ("data" in source) { // data + options |
| 133 | + const {data, ...options} = source; |
| 134 | + if (isArrowTable(data)) { |
| 135 | + await insertArrowTable(db, name, data, options); |
| 136 | + } else { |
| 137 | + await insertArray(db, name, data, options); |
| 138 | + } |
| 139 | + } else if ("file" in source) { // file + options |
| 140 | + const {file, ...options} = source; |
| 141 | + await insertFile(db, name, file, options); |
| 142 | + } else { |
| 143 | + throw new Error(`invalid source: ${source}`); |
| 144 | + } |
| 145 | + }) |
| 146 | + ); |
| 147 | + return new DuckDBClient(db); |
| 148 | + } |
| 149 | +} |
| 150 | + |
| 151 | +async function insertFile(database, name, file, options) { |
| 152 | + const url = await file.url(); |
| 153 | + if (url.startsWith("blob:")) { |
| 154 | + const buffer = await file.arrayBuffer(); |
| 155 | + await database.registerFileBuffer(file.name, new Uint8Array(buffer)); |
| 156 | + } else { |
| 157 | + await database.registerFileURL(file.name, url); |
| 158 | + } |
| 159 | + const connection = await database.connect(); |
| 160 | + try { |
| 161 | + switch (file.mimeType) { |
| 162 | + case "text/csv": |
| 163 | + return await connection.insertCSVFromPath(file.name, { |
| 164 | + name, |
| 165 | + schema: "main", |
| 166 | + ...options |
| 167 | + }); |
| 168 | + case "application/json": |
| 169 | + return await connection.insertJSONFromPath(file.name, { |
| 170 | + name, |
| 171 | + schema: "main", |
| 172 | + ...options |
| 173 | + }); |
| 174 | + default: |
| 175 | + if (/\.arrow$/i.test(file.name)) { |
| 176 | + const buffer = new Uint8Array(await file.arrayBuffer()); |
| 177 | + return await connection.insertArrowFromIPCStream(buffer, { |
| 178 | + name, |
| 179 | + schema: "main", |
| 180 | + ...options |
| 181 | + }); |
| 182 | + } |
| 183 | + if (/\.parquet$/i.test(file.name)) { |
| 184 | + return await connection.query( |
| 185 | + `CREATE VIEW '${name}' AS SELECT * FROM parquet_scan('${file.name}')` |
| 186 | + ); |
| 187 | + } |
| 188 | + throw new Error(`unknown file type: ${file.mimeType}`); |
| 189 | + } |
| 190 | + } finally { |
| 191 | + await connection.close(); |
| 192 | + } |
| 193 | +} |
| 194 | + |
| 195 | +async function insertArrowTable(database, name, table, options) { |
| 196 | + const arrow = await loadArrow(); |
| 197 | + const buffer = arrow.tableToIPC(table); |
| 198 | + const connection = await database.connect(); |
| 199 | + try { |
| 200 | + await connection.insertArrowFromIPCStream(buffer, { |
| 201 | + name, |
| 202 | + schema: "main", |
| 203 | + ...options |
| 204 | + }); |
| 205 | + } finally { |
| 206 | + await connection.close(); |
| 207 | + } |
| 208 | +} |
| 209 | + |
| 210 | +async function insertArray(database, name, array, options) { |
| 211 | + const arrow = await loadArrow(); |
| 212 | + const table = arrow.tableFromJSON(array); |
| 213 | + return await insertArrowTable(database, name, table, options); |
| 214 | +} |
| 215 | + |
| 216 | +async function createDuckDB() { |
| 217 | + const duck = await import(`${cdn}${duckdb.resolve()}`); |
| 218 | + const bundle = await duck.selectBundle({ |
| 219 | + mvp: { |
| 220 | + mainModule: `${cdn}${duckdb.resolve("dist/duckdb-mvp.wasm")}`, |
| 221 | + mainWorker: `${cdn}${duckdb.resolve("dist/duckdb-browser-mvp.worker.js")}` |
| 222 | + }, |
| 223 | + eh: { |
| 224 | + mainModule: `${cdn}${duckdb.resolve("dist/duckdb-eh.wasm")}`, |
| 225 | + mainWorker: `${cdn}${duckdb.resolve("dist/duckdb-browser-eh.worker.js")}` |
| 226 | + } |
| 227 | + }); |
| 228 | + const logger = new duck.ConsoleLogger(); |
| 229 | + const worker = await duck.createWorker(bundle.mainWorker); |
| 230 | + const db = new duck.AsyncDuckDB(logger, worker); |
| 231 | + await db.instantiate(bundle.mainModule); |
| 232 | + return db; |
| 233 | +} |
| 234 | + |
| 235 | +async function loadArrow() { |
| 236 | + return await import(`${cdn}${arrow.resolve()}`); |
| 237 | +} |
| 238 | + |
| 239 | +// https://duckdb.org/docs/sql/data_types/overview |
| 240 | +function getDuckDBType(type) { |
| 241 | + switch (type) { |
| 242 | + case "BIGINT": |
| 243 | + case "HUGEINT": |
| 244 | + case "UBIGINT": |
| 245 | + return "bigint"; |
| 246 | + case "DOUBLE": |
| 247 | + case "REAL": |
| 248 | + return "number"; |
| 249 | + case "INTEGER": |
| 250 | + case "SMALLINT": |
| 251 | + case "TINYINT": |
| 252 | + case "USMALLINT": |
| 253 | + case "UINTEGER": |
| 254 | + case "UTINYINT": |
| 255 | + return "integer"; |
| 256 | + case "BOOLEAN": |
| 257 | + return "boolean"; |
| 258 | + case "DATE": |
| 259 | + case "TIMESTAMP": |
| 260 | + case "TIMESTAMP WITH TIME ZONE": |
| 261 | + return "date"; |
| 262 | + case "VARCHAR": |
| 263 | + case "UUID": |
| 264 | + return "string"; |
| 265 | + // case "BLOB": |
| 266 | + // case "INTERVAL": |
| 267 | + // case "TIME": |
| 268 | + default: |
| 269 | + if (/^DECIMAL\(/.test(type)) return "integer"; |
| 270 | + return "other"; |
| 271 | + } |
| 272 | +} |
0 commit comments