From 31054fe79c85c60593770beadedd223475b17943 Mon Sep 17 00:00:00 2001 From: sharon wang <25834218+sharon-wang@users.noreply.github.com> Date: Mon, 21 Oct 2024 14:49:52 -0400 Subject: [PATCH 1/5] use `pathToFileURL` to construct the Windows path --- extensions/positron-duckdb/src/extension.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/extensions/positron-duckdb/src/extension.ts b/extensions/positron-duckdb/src/extension.ts index b9af790061f..8263cd90a20 100644 --- a/extensions/positron-duckdb/src/extension.ts +++ b/extensions/positron-duckdb/src/extension.ts @@ -32,6 +32,7 @@ import * as duckdb from '@duckdb/duckdb-wasm'; import Worker from 'web-worker'; import { basename, extname, join } from 'path'; import { Table } from 'apache-arrow'; +import { pathToFileURL } from 'url'; class DuckDBInstance { constructor(readonly db: duckdb.AsyncDuckDB, readonly con: duckdb.AsyncDuckDBConnection) { } @@ -45,6 +46,12 @@ class DuckDBInstance { mainModule: join(distPath, 'duckdb-eh.wasm'), mainWorker: join(distPath, 'duckdb-node-eh.worker.cjs') }; + // On Windows, we need to call pathToFileURL on mainModule and mainWorkerto get the correct path format + if (process.platform === 'win32') { + bundle.mainModule = pathToFileURL(bundle.mainModule).toString(); + bundle.mainWorker = pathToFileURL(bundle.mainWorker).toString(); + } + const logger = new duckdb.VoidLogger(); const worker = new Worker(bundle.mainWorker); From 449b43317bb2847c617bbe1e9ebf96116cab1dda Mon Sep 17 00:00:00 2001 From: sharon wang <25834218+sharon-wang@users.noreply.github.com> Date: Mon, 21 Oct 2024 14:52:15 -0400 Subject: [PATCH 2/5] don't create the file URL for the mainModule --- extensions/positron-duckdb/src/extension.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/extensions/positron-duckdb/src/extension.ts b/extensions/positron-duckdb/src/extension.ts index 8263cd90a20..09571db17fa 100644 --- a/extensions/positron-duckdb/src/extension.ts +++ b/extensions/positron-duckdb/src/extension.ts @@ -46,9 +46,11 @@ class DuckDBInstance { mainModule: join(distPath, 'duckdb-eh.wasm'), mainWorker: join(distPath, 'duckdb-node-eh.worker.cjs') }; - // On Windows, we need to call pathToFileURL on mainModule and mainWorkerto get the correct path format + + // On Windows, we need to call pathToFileURL on mainWorker because the web-worker package + // does not support Windows paths that start with a drive letter. if (process.platform === 'win32') { - bundle.mainModule = pathToFileURL(bundle.mainModule).toString(); + // Example: file:///c:/Users/sharon/positron/extensions/positron-duckdb/node_modules/@duckdb/duckdb-wasm/dist/duckdb-node-eh.worker.cjs bundle.mainWorker = pathToFileURL(bundle.mainWorker).toString(); } From f65e41a4958f71765d4944f9effbb2314119cd0e Mon Sep 17 00:00:00 2001 From: sharon wang <25834218+sharon-wang@users.noreply.github.com> Date: Mon, 21 Oct 2024 17:42:33 -0400 Subject: [PATCH 3/5] add handling for Windows dataset path --- extensions/positron-duckdb/src/extension.ts | 31 +++++++++++++++------ 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/extensions/positron-duckdb/src/extension.ts b/extensions/positron-duckdb/src/extension.ts index 09571db17fa..ecf0f33b92b 100644 --- a/extensions/positron-duckdb/src/extension.ts +++ b/extensions/positron-duckdb/src/extension.ts @@ -30,7 +30,7 @@ import { } from './interfaces'; import * as duckdb from '@duckdb/duckdb-wasm'; import Worker from 'web-worker'; -import { basename, extname, join } from 'path'; +import path, { basename, extname, join } from 'path'; import { Table } from 'apache-arrow'; import { pathToFileURL } from 'url'; @@ -138,24 +138,39 @@ export class DataExplorerRpcHandler { async openDataset(params: OpenDatasetParams): Promise { const tableName = `positron_${this._tableIndex++}`; - this._uriToTableName.set(params.uri, tableName); - const fileExt = extname(params.uri); + let uri = params.uri; - // console.log(`Opening ${params.uri}`); + console.log('[POSITRON] Opening dataset with params:', params); + + // On Windows, we need to fix up the path so that it is recognizable as a drive path. + if (process.platform === 'win32') { + const filePath = path.parse(params.uri); + console.log('[POSITRON] Parsed file path:', filePath); + // Example: {root: '/', dir: '/c:/Users/sharon/qa-example-content/data-files/flights', base: 'flights.parquet', ext: '.parquet', name: 'flights'} + if (filePath.root === '/' && filePath.dir.startsWith('/')) { + // Remove the leading slash from the path so the path is drive path + uri = uri.substring(1); + } + } + + this._uriToTableName.set(uri, tableName); + const fileExt = extname(uri); + + console.log(`[POSITRON] Opening ${uri}`); let scanOperation; switch (fileExt) { case '.parquet': case '.parq': - scanOperation = `parquet_scan('${params.uri}')`; + scanOperation = `parquet_scan('${uri}')`; break; // TODO: Will need to be able to pass CSV / TSV options from the // UI at some point. case '.csv': - scanOperation = `read_csv('${params.uri}')`; + scanOperation = `read_csv('${uri}')`; break; case '.tsv': - scanOperation = `read_csv('${params.uri}', delim='\t')`; + scanOperation = `read_csv('${uri}', delim='\t')`; break; default: return { error_message: `Unsupported file extension: ${fileExt}` }; @@ -176,7 +191,7 @@ export class DataExplorerRpcHandler { return { error_message: result }; } - this._uriToSchema.set(params.uri, result.toArray()); + this._uriToSchema.set(uri, result.toArray()); return {}; } From b87f0ea60aa3dd2279fbbeb64fc5c71055440d66 Mon Sep 17 00:00:00 2001 From: sharon wang <25834218+sharon-wang@users.noreply.github.com> Date: Mon, 21 Oct 2024 18:41:14 -0400 Subject: [PATCH 4/5] apply uri fix in `_dispatchRpc` --- extensions/positron-duckdb/src/extension.ts | 55 ++++++++++--------- .../positronDataExplorerDuckDBBackend.ts | 3 +- 2 files changed, 30 insertions(+), 28 deletions(-) diff --git a/extensions/positron-duckdb/src/extension.ts b/extensions/positron-duckdb/src/extension.ts index ecf0f33b92b..cc063387efe 100644 --- a/extensions/positron-duckdb/src/extension.ts +++ b/extensions/positron-duckdb/src/extension.ts @@ -136,28 +136,12 @@ export class DataExplorerRpcHandler { } } - async openDataset(params: OpenDatasetParams): Promise { + async openDataset(uri: string, params: OpenDatasetParams): Promise { const tableName = `positron_${this._tableIndex++}`; - let uri = params.uri; - - console.log('[POSITRON] Opening dataset with params:', params); - - // On Windows, we need to fix up the path so that it is recognizable as a drive path. - if (process.platform === 'win32') { - const filePath = path.parse(params.uri); - console.log('[POSITRON] Parsed file path:', filePath); - // Example: {root: '/', dir: '/c:/Users/sharon/qa-example-content/data-files/flights', base: 'flights.parquet', ext: '.parquet', name: 'flights'} - if (filePath.root === '/' && filePath.dir.startsWith('/')) { - // Remove the leading slash from the path so the path is drive path - uri = uri.substring(1); - } - } this._uriToTableName.set(uri, tableName); const fileExt = extname(uri); - console.log(`[POSITRON] Opening ${uri}`); - let scanOperation; switch (fileExt) { case '.parquet': @@ -475,6 +459,11 @@ export class DataExplorerRpcHandler { private async _getUnfilteredShape(uri: string) { const schema = this.getCachedSchema(uri); + + if (!schema) { + console.error('Schema not found for uri:', uri); + } + const numColumns = schema.length; const tableName = this.getTableName(uri); @@ -510,26 +499,38 @@ export class DataExplorerRpcHandler { } private async _dispatchRpc(rpc: DataExplorerRpc): RpcResponse { - if (rpc.method === DataExplorerBackendRequest.OpenDataset) { - return this.openDataset(rpc.params as OpenDatasetParams); - } - if (rpc.uri === undefined) { return `URI for open dataset must be provided: ${rpc.method} `; } + + let uri = rpc.uri; + + // On Windows, we need to fix up the path so that it is recognizable as a drive path. + // Not sure how reliable this is, but it seems to work for now. + if (process.platform === 'win32') { + const filePath = path.parse(uri); + // Example: {root: '/', dir: '/c:/Users/sharon/qa-example-content/data-files/flights', base: 'flights.parquet', ext: '.parquet', name: 'flights'} + if (filePath.root === '/' && filePath.dir.startsWith('/')) { + // Remove the leading slash from the path so the path is drive path + uri = uri.substring(1); + } + } + switch (rpc.method) { + case DataExplorerBackendRequest.OpenDataset: + return this.openDataset(uri, rpc.params as OpenDatasetParams); case DataExplorerBackendRequest.GetSchema: - return this.getSchema(rpc.uri, rpc.params as GetSchemaParams); + return this.getSchema(uri, rpc.params as GetSchemaParams); case DataExplorerBackendRequest.GetDataValues: - return this.getDataValues(rpc.uri, rpc.params as GetDataValuesParams); + return this.getDataValues(uri, rpc.params as GetDataValuesParams); case DataExplorerBackendRequest.GetRowLabels: - return this.getRowLabels(rpc.uri, rpc.params as GetRowLabelsParams); + return this.getRowLabels(uri, rpc.params as GetRowLabelsParams); case DataExplorerBackendRequest.GetState: - return this.getState(rpc.uri); + return this.getState(uri); case DataExplorerBackendRequest.SetRowFilters: - return this.setRowFilters(rpc.uri, rpc.params as SetRowFiltersParams); + return this.setRowFilters(uri, rpc.params as SetRowFiltersParams); case DataExplorerBackendRequest.GetColumnProfiles: - return this.getColumnProfiles(rpc.uri, rpc.params as GetColumnProfilesParams); + return this.getColumnProfiles(uri, rpc.params as GetColumnProfilesParams); case DataExplorerBackendRequest.ExportDataSelection: case DataExplorerBackendRequest.SetColumnFilters: case DataExplorerBackendRequest.SetSortColumns: diff --git a/src/vs/workbench/services/positronDataExplorer/common/positronDataExplorerDuckDBBackend.ts b/src/vs/workbench/services/positronDataExplorer/common/positronDataExplorerDuckDBBackend.ts index a5dffc9e96a..d15508f9f43 100644 --- a/src/vs/workbench/services/positronDataExplorer/common/positronDataExplorerDuckDBBackend.ts +++ b/src/vs/workbench/services/positronDataExplorer/common/positronDataExplorerDuckDBBackend.ts @@ -132,7 +132,8 @@ export class PositronDataExplorerDuckDBBackend extends Disposable implements IDa async openDataset() { const result = await this._execRpc({ method: DataExplorerBackendRequest.OpenDataset, - params: { uri: this.filePath } + uri: this.filePath, + params: {}, }); if (result.error_message) { From 8c6246cc4f06c4c1ab3286729f616d0f6fad6301 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 21 Oct 2024 18:39:11 -0500 Subject: [PATCH 5/5] Fix get_column_profiles, only modify URI in OpenDataset RPC handler --- extensions/positron-duckdb/src/extension.ts | 85 ++++++++++--------- .../positronDataExplorerDuckDBBackend.ts | 3 +- 2 files changed, 45 insertions(+), 43 deletions(-) diff --git a/extensions/positron-duckdb/src/extension.ts b/extensions/positron-duckdb/src/extension.ts index cc063387efe..9af712b6022 100644 --- a/extensions/positron-duckdb/src/extension.ts +++ b/extensions/positron-duckdb/src/extension.ts @@ -29,8 +29,8 @@ import { TableSchema } from './interfaces'; import * as duckdb from '@duckdb/duckdb-wasm'; +import * as path from 'path'; import Worker from 'web-worker'; -import path, { basename, extname, join } from 'path'; import { Table } from 'apache-arrow'; import { pathToFileURL } from 'url'; @@ -41,20 +41,19 @@ class DuckDBInstance { // Create the path to the DuckDB WASM bundle. Note that only the EH // bundle for Node is used for now as we don't support Positron // extensions running in a browser context yet. - const distPath = join(ctx.extensionPath, 'node_modules', '@duckdb', 'duckdb-wasm', 'dist'); + const distPath = path.join(ctx.extensionPath, 'node_modules', '@duckdb', 'duckdb-wasm', 'dist'); const bundle = { - mainModule: join(distPath, 'duckdb-eh.wasm'), - mainWorker: join(distPath, 'duckdb-node-eh.worker.cjs') + mainModule: path.join(distPath, 'duckdb-eh.wasm'), + mainWorker: path.join(distPath, 'duckdb-node-eh.worker.cjs') }; // On Windows, we need to call pathToFileURL on mainWorker because the web-worker package // does not support Windows paths that start with a drive letter. if (process.platform === 'win32') { - // Example: file:///c:/Users/sharon/positron/extensions/positron-duckdb/node_modules/@duckdb/duckdb-wasm/dist/duckdb-node-eh.worker.cjs bundle.mainWorker = pathToFileURL(bundle.mainWorker).toString(); } - const logger = new duckdb.VoidLogger(); + const logger = new duckdb.ConsoleLogger(); const worker = new Worker(bundle.mainWorker); @@ -90,6 +89,25 @@ const SENTINEL_NAN = 2; const SENTINEL_INF = 10; const SENTINEL_NEGINF = 10; +function uriToFilePath(uri: string) { + // On Windows, we need to fix up the path so that it is recognizable as a drive path. + // Not sure how reliable this is, but it seems to work for now. + if (process.platform === 'win32') { + const filePath = path.parse(uri); + // Example: { + // root: '/', + // dir: '/c:/Users/sharon/qa-example-content/data-files/flights', + // base: 'flights.parquet', ext: '.parquet', + // name: 'flights' + // } + if (filePath.root === '/' && filePath.dir.startsWith('/')) { + // Remove the leading slash from the path so the path is drive path + return uri.substring(1); + } + } + return uri; +} + /** * Implementation of Data Explorer backend protocol using duckdb-wasm, * for serving requests coming in through the vscode command. @@ -136,25 +154,27 @@ export class DataExplorerRpcHandler { } } - async openDataset(uri: string, params: OpenDatasetParams): Promise { + async openDataset(params: OpenDatasetParams): Promise { const tableName = `positron_${this._tableIndex++}`; - this._uriToTableName.set(uri, tableName); - const fileExt = extname(uri); + this._uriToTableName.set(params.uri, tableName); + + const filePath = uriToFilePath(params.uri); + const fileExt = path.extname(filePath); let scanOperation; switch (fileExt) { case '.parquet': case '.parq': - scanOperation = `parquet_scan('${uri}')`; + scanOperation = `parquet_scan('${filePath}')`; break; // TODO: Will need to be able to pass CSV / TSV options from the // UI at some point. case '.csv': - scanOperation = `read_csv('${uri}')`; + scanOperation = `read_csv('${filePath}')`; break; case '.tsv': - scanOperation = `read_csv('${uri}', delim='\t')`; + scanOperation = `read_csv('${filePath}', delim='\t')`; break; default: return { error_message: `Unsupported file extension: ${fileExt}` }; @@ -175,7 +195,7 @@ export class DataExplorerRpcHandler { return { error_message: result }; } - this._uriToSchema.set(uri, result.toArray()); + this._uriToSchema.set(params.uri, result.toArray()); return {}; } @@ -327,7 +347,7 @@ export class DataExplorerRpcHandler { async getState(uri: string): RpcResponse { const [num_rows, num_columns] = await this._getUnfilteredShape(uri); return { - display_name: basename(uri), + display_name: path.basename(uri), table_shape: { num_rows, num_columns }, table_unfiltered_shape: { num_rows, num_columns }, has_row_labels: false, @@ -459,11 +479,6 @@ export class DataExplorerRpcHandler { private async _getUnfilteredShape(uri: string) { const schema = this.getCachedSchema(uri); - - if (!schema) { - console.error('Schema not found for uri:', uri); - } - const numColumns = schema.length; const tableName = this.getTableName(uri); @@ -499,38 +514,26 @@ export class DataExplorerRpcHandler { } private async _dispatchRpc(rpc: DataExplorerRpc): RpcResponse { - if (rpc.uri === undefined) { - return `URI for open dataset must be provided: ${rpc.method} `; + if (rpc.method === DataExplorerBackendRequest.OpenDataset) { + return this.openDataset(rpc.params as OpenDatasetParams); } - let uri = rpc.uri; - - // On Windows, we need to fix up the path so that it is recognizable as a drive path. - // Not sure how reliable this is, but it seems to work for now. - if (process.platform === 'win32') { - const filePath = path.parse(uri); - // Example: {root: '/', dir: '/c:/Users/sharon/qa-example-content/data-files/flights', base: 'flights.parquet', ext: '.parquet', name: 'flights'} - if (filePath.root === '/' && filePath.dir.startsWith('/')) { - // Remove the leading slash from the path so the path is drive path - uri = uri.substring(1); - } + if (rpc.uri === undefined) { + return `URI for open dataset must be provided: ${rpc.method} `; } - switch (rpc.method) { - case DataExplorerBackendRequest.OpenDataset: - return this.openDataset(uri, rpc.params as OpenDatasetParams); case DataExplorerBackendRequest.GetSchema: - return this.getSchema(uri, rpc.params as GetSchemaParams); + return this.getSchema(rpc.uri, rpc.params as GetSchemaParams); case DataExplorerBackendRequest.GetDataValues: - return this.getDataValues(uri, rpc.params as GetDataValuesParams); + return this.getDataValues(rpc.uri, rpc.params as GetDataValuesParams); case DataExplorerBackendRequest.GetRowLabels: - return this.getRowLabels(uri, rpc.params as GetRowLabelsParams); + return this.getRowLabels(rpc.uri, rpc.params as GetRowLabelsParams); case DataExplorerBackendRequest.GetState: - return this.getState(uri); + return this.getState(rpc.uri); case DataExplorerBackendRequest.SetRowFilters: - return this.setRowFilters(uri, rpc.params as SetRowFiltersParams); + return this.setRowFilters(rpc.uri, rpc.params as SetRowFiltersParams); case DataExplorerBackendRequest.GetColumnProfiles: - return this.getColumnProfiles(uri, rpc.params as GetColumnProfilesParams); + return this.getColumnProfiles(rpc.uri, rpc.params as GetColumnProfilesParams); case DataExplorerBackendRequest.ExportDataSelection: case DataExplorerBackendRequest.SetColumnFilters: case DataExplorerBackendRequest.SetSortColumns: diff --git a/src/vs/workbench/services/positronDataExplorer/common/positronDataExplorerDuckDBBackend.ts b/src/vs/workbench/services/positronDataExplorer/common/positronDataExplorerDuckDBBackend.ts index d15508f9f43..a5dffc9e96a 100644 --- a/src/vs/workbench/services/positronDataExplorer/common/positronDataExplorerDuckDBBackend.ts +++ b/src/vs/workbench/services/positronDataExplorer/common/positronDataExplorerDuckDBBackend.ts @@ -132,8 +132,7 @@ export class PositronDataExplorerDuckDBBackend extends Disposable implements IDa async openDataset() { const result = await this._execRpc({ method: DataExplorerBackendRequest.OpenDataset, - uri: this.filePath, - params: {}, + params: { uri: this.filePath } }); if (result.error_message) {