Skip to content

Commit 2f1d653

Browse files
Dvir Yitzchakidvirtz
Dvir Yitzchaki
authored andcommitted
replace exec with spawn to support large parquets. fixes llvm#1
1 parent 03f5e3e commit 2f1d653

File tree

5 files changed

+1361
-22
lines changed

5 files changed

+1361
-22
lines changed

src/extension.ts

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@
33
// Import the module and reference it with the alias vscode in your code below
44
import * as vscode from 'vscode';
55
import { ParquetContentProvider } from './parquet_content_provider';
6-
import { execFile } from 'child_process';
6+
import { spawn } from 'child_process';
77

88
// this method is called when your extension is activated
99
// your extension is activated the very first time the command is executed
1010
export function activate(context: vscode.ExtensionContext) {
1111
console.log('parquet-viewer activated');
1212

13-
execFile('parquet-tools', ['-h'], err => {
14-
vscode.window.showErrorMessage('parquet-tools not in PATH');
13+
spawn('parquet-tools', ['-h']).on('error', (err) => {
14+
vscode.window.showErrorMessage('parquet-tools not in PATH');
1515
});
1616

1717
const scheme = 'parquet';
@@ -21,8 +21,8 @@ export function activate(context: vscode.ExtensionContext) {
2121

2222
let onFile = function (document: vscode.TextDocument) {
2323
if (document.fileName.endsWith('parquet') && document.uri.scheme !== scheme) {
24-
let uri = vscode.Uri.parse(scheme + '://' + document.uri.path);
25-
vscode.window.showTextDocument(uri, { preview: true, viewColumn: vscode.window.activeTextEditor!.viewColumn });
24+
let uri = vscode.Uri.parse(scheme + '://' + document.uri.path + ".as.json");
25+
vscode.window.showTextDocument(uri);
2626
}
2727
};
2828

@@ -36,6 +36,9 @@ export function activate(context: vscode.ExtensionContext) {
3636
}));
3737

3838
context.subscriptions.push(vscode.workspace.onDidOpenTextDocument(onFile));
39+
context.subscriptions.push(vscode.workspace.onDidChangeTextDocument((e) => {
40+
onFile(e.document);
41+
}));
3942

4043
if (vscode.window.activeTextEditor) {
4144
onFile(vscode.window.activeTextEditor.document);

src/parquet_content_provider.ts

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,56 @@
11
import { TextDocumentContentProvider, EventEmitter, Uri, window } from "vscode";
2-
import { exec } from "child_process";
2+
import { spawn } from "child_process";
3+
// import { Readable } from "stream";
4+
// import { tmpdir } from "os";
5+
// import { createWriteStream, readFile, readFileSync } from 'fs';
6+
// import { sep } from 'path';
7+
8+
9+
class Json {
10+
data: string = "";
11+
}
312

413
export class ParquetContentProvider implements TextDocumentContentProvider {
514

15+
private jsons: Map<string, Json> = new Map();
16+
617
// emitter and its event
718
onDidChangeEmitter = new EventEmitter<Uri>();
819
onDidChange = this.onDidChangeEmitter.event;
920

1021
async provideTextDocumentContent(uri: Uri): Promise<string> {
11-
// simply invoke cowsay, use uri-path as text
1222
return new Promise<string>((resolve, reject) => {
13-
exec('parquet-tools cat -j ' + uri.path, (error, stdout, stderr) => {
14-
if (error) {
15-
const message = `error when running parquet-tools ${error}:\n${stderr}`;
23+
24+
const path = uri.path.replace(RegExp('\.as\.json$'), '');
25+
26+
if (this.jsons.has(path)) {
27+
resolve(this.jsons.get(path)!.data);
28+
}
29+
30+
var json = new Json;
31+
this.jsons.set(path, json);
32+
33+
const parquet_tools = spawn('parquet-tools', ['cat', '-j', path]);
34+
// parquet_tools.stdout.pipe(stream)
35+
var stderr: string = "";
36+
parquet_tools.stderr.on('data', (data) => {
37+
stderr += data;
38+
});
39+
parquet_tools.stdout.on('data', (data) => {
40+
json.data += data;
41+
this.onDidChangeEmitter.fire(uri);
42+
});
43+
44+
parquet_tools.on('close', (code) => {
45+
if (code) {
46+
const message = `error when running parquet-tools ${code}:\n${stderr}`;
1647
window.showErrorMessage(message);
1748
reject(message);
1849
}
1950

20-
resolve(stdout);
51+
resolve(json.data);
2152
});
2253
});
2354
}
55+
2456
}

src/test/provider.test.ts

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,25 +7,27 @@ import { Uri } from 'vscode';
77

88
suite("Provider tests", () => {
99

10-
test("Parquet to JSON", async () => {
10+
test('Converts Parquet to JSON', (done) => {
1111
const provider = new ParquetContentProvider();
12-
const json = getUri("small.parquet").then(parquet => {
13-
return provider.provideTextDocumentContent(parquet);
14-
});
15-
const expected = fileRead("small.json");
12+
["small", "large"].forEach(async (name) => {
13+
const json = getUri(`${name}.parquet`).then(parquet => {
14+
return provider.provideTextDocumentContent(parquet);
15+
});
16+
const expected = fileRead(`${name}.json`);
1617

17-
return Promise.all([json, expected]).then(values => {
18-
assert.strictEqual(values[0], values[1]);
18+
Promise.all([json, expected]).then((values) =>
19+
assert.strictEqual(values[0], values[1]))
20+
.then(done, done);
1921
});
2022
});
2123

2224
test("Error on not existing file", async () => {
2325
const provider = new ParquetContentProvider();
2426

2527
return provider.provideTextDocumentContent(Uri.parse("file://.")).then(data => {
26-
assert(false, "should not get here");
27-
}, (error: string) => {
28-
assert(error.indexOf('error when running parquet-tools') !== -1);
29-
});
28+
assert(false, "should not get here");
29+
}, (error: string) => {
30+
assert(error.indexOf('error when running parquet-tools') !== -1);
31+
});
3032
});
3133
});

0 commit comments

Comments
 (0)