mirror of
https://github.com/ckaczor/azuredatastudio.git
synced 2026-02-03 09:35:40 -05:00
Merge from vscode 3a6dcb42008d509900b3a3b2d695564eeb4dbdac (#5098)
This commit is contained in:
@@ -18,15 +18,20 @@ export const UTF16be_BOM = [0xFE, 0xFF];
|
||||
export const UTF16le_BOM = [0xFF, 0xFE];
|
||||
export const UTF8_BOM = [0xEF, 0xBB, 0xBF];
|
||||
|
||||
const ZERO_BYTE_DETECTION_BUFFER_MAX_LEN = 512; // number of bytes to look at to decide about a file being binary or not
|
||||
const NO_GUESS_BUFFER_MAX_LEN = 512; // when not auto guessing the encoding, small number of bytes are enough
|
||||
const AUTO_GUESS_BUFFER_MAX_LEN = 512 * 8; // with auto guessing we want a lot more content to be read for guessing
|
||||
|
||||
export interface IDecodeStreamOptions {
|
||||
guessEncoding?: boolean;
|
||||
guessEncoding: boolean;
|
||||
minBytesRequiredForDetection?: number;
|
||||
overwriteEncoding?(detectedEncoding: string | null): string;
|
||||
|
||||
overwriteEncoding(detectedEncoding: string | null): string;
|
||||
}
|
||||
|
||||
export interface IDecodeStreamResult {
|
||||
detected: IDetectedEncodingResult;
|
||||
stream: NodeJS.ReadableStream;
|
||||
detected: IDetectedEncodingResult;
|
||||
}
|
||||
|
||||
export function toDecodeStream(readable: Readable, options: IDecodeStreamOptions): Promise<IDecodeStreamResult> {
|
||||
@@ -34,78 +39,82 @@ export function toDecodeStream(readable: Readable, options: IDecodeStreamOptions
|
||||
options.minBytesRequiredForDetection = options.guessEncoding ? AUTO_GUESS_BUFFER_MAX_LEN : NO_GUESS_BUFFER_MAX_LEN;
|
||||
}
|
||||
|
||||
if (!options.overwriteEncoding) {
|
||||
options.overwriteEncoding = detected => detected || UTF8;
|
||||
}
|
||||
|
||||
return new Promise<IDecodeStreamResult>((resolve, reject) => {
|
||||
const writer = new class extends Writable {
|
||||
private decodeStream: NodeJS.ReadWriteStream;
|
||||
private decodeStreamConstruction: Promise<void>;
|
||||
private buffer: Buffer[] = [];
|
||||
private decodeStreamPromise: Promise<void>;
|
||||
|
||||
private bufferedChunks: Buffer[] = [];
|
||||
private bytesBuffered = 0;
|
||||
|
||||
_write(chunk: any, encoding: string, callback: Function): void {
|
||||
_write(chunk: Buffer, encoding: string, callback: (error: Error | null) => void): void {
|
||||
if (!Buffer.isBuffer(chunk)) {
|
||||
callback(new Error('data must be a buffer'));
|
||||
return callback(new Error('toDecodeStream(): data must be a buffer'));
|
||||
}
|
||||
|
||||
// if the decode stream is ready, we just write directly
|
||||
if (this.decodeStream) {
|
||||
this.decodeStream.write(chunk, callback); // just a forwarder now
|
||||
this.decodeStream.write(chunk, callback);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
this.buffer.push(chunk);
|
||||
this.bytesBuffered += chunk.length;
|
||||
// otherwise we need to buffer the data until the stream is ready
|
||||
this.bufferedChunks.push(chunk);
|
||||
this.bytesBuffered += chunk.byteLength;
|
||||
|
||||
// waiting for the decoder to be ready
|
||||
if (this.decodeStreamConstruction) {
|
||||
this.decodeStreamConstruction.then(() => callback(), err => callback(err));
|
||||
if (this.decodeStreamPromise) {
|
||||
this.decodeStreamPromise.then(() => callback(null), error => callback(error));
|
||||
}
|
||||
|
||||
// buffered enough data, create stream and forward data
|
||||
// buffered enough data for encoding detection, create stream and forward data
|
||||
else if (typeof options.minBytesRequiredForDetection === 'number' && this.bytesBuffered >= options.minBytesRequiredForDetection) {
|
||||
this._startDecodeStream(callback);
|
||||
}
|
||||
|
||||
// only buffering
|
||||
// only buffering until enough data for encoding detection is there
|
||||
else {
|
||||
callback();
|
||||
callback(null);
|
||||
}
|
||||
}
|
||||
|
||||
_startDecodeStream(callback: Function): void {
|
||||
this.decodeStreamConstruction = Promise.resolve(detectEncodingFromBuffer({
|
||||
buffer: Buffer.concat(this.buffer),
|
||||
_startDecodeStream(callback: (error: Error | null) => void): void {
|
||||
|
||||
// detect encoding from buffer
|
||||
this.decodeStreamPromise = Promise.resolve(detectEncodingFromBuffer({
|
||||
buffer: Buffer.concat(this.bufferedChunks),
|
||||
bytesRead: this.bytesBuffered
|
||||
}, options.guessEncoding)).then(detected => {
|
||||
if (options.overwriteEncoding) {
|
||||
detected.encoding = options.overwriteEncoding(detected.encoding);
|
||||
}
|
||||
|
||||
// ensure to respect overwrite of encoding
|
||||
detected.encoding = options.overwriteEncoding(detected.encoding);
|
||||
|
||||
// decode and write buffer
|
||||
this.decodeStream = decodeStream(detected.encoding);
|
||||
this.decodeStream.write(Buffer.concat(this.bufferedChunks), callback);
|
||||
this.bufferedChunks.length = 0;
|
||||
|
||||
for (const buffer of this.buffer) {
|
||||
this.decodeStream.write(buffer);
|
||||
}
|
||||
|
||||
callback();
|
||||
// signal to the outside our detected encoding
|
||||
// and final decoder stream
|
||||
resolve({ detected, stream: this.decodeStream });
|
||||
}, err => {
|
||||
this.emit('error', err);
|
||||
callback(err);
|
||||
}, error => {
|
||||
this.emit('error', error);
|
||||
|
||||
callback(error);
|
||||
});
|
||||
}
|
||||
|
||||
_final(callback: (err?: any) => any) {
|
||||
_final(callback: (error: Error | null) => void) {
|
||||
|
||||
// normal finish
|
||||
if (this.decodeStream) {
|
||||
this.decodeStream.end(callback);
|
||||
}
|
||||
|
||||
// we were still waiting for data...
|
||||
// we were still waiting for data to do the encoding
|
||||
// detection. thus, wrap up starting the stream even
|
||||
// without all the data to get things going
|
||||
else {
|
||||
this._startDecodeStream(() => this.decodeStream.end(callback));
|
||||
}
|
||||
@@ -149,7 +158,7 @@ function toNodeEncoding(enc: string | null): string {
|
||||
}
|
||||
|
||||
export function detectEncodingByBOMFromBuffer(buffer: Buffer | VSBuffer | null, bytesRead: number): string | null {
|
||||
if (!buffer || bytesRead < 2) {
|
||||
if (!buffer || bytesRead < UTF16be_BOM.length) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -166,7 +175,7 @@ export function detectEncodingByBOMFromBuffer(buffer: Buffer | VSBuffer | null,
|
||||
return UTF16le;
|
||||
}
|
||||
|
||||
if (bytesRead < 3) {
|
||||
if (bytesRead < UTF8_BOM.length) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -256,10 +265,6 @@ export function toCanonicalName(enc: string): string {
|
||||
}
|
||||
}
|
||||
|
||||
const ZERO_BYTE_DETECTION_BUFFER_MAX_LEN = 512; // number of bytes to look at to decide about a file being binary or not
|
||||
const NO_GUESS_BUFFER_MAX_LEN = 512; // when not auto guessing the encoding, small number of bytes are enough
|
||||
const AUTO_GUESS_BUFFER_MAX_LEN = 512 * 8; // with auto guessing we want a lot more content to be read for guessing
|
||||
|
||||
export interface IDetectedEncodingResult {
|
||||
encoding: string | null;
|
||||
seemsBinary: boolean;
|
||||
|
||||
Reference in New Issue
Block a user