Merge from vscode 3a6dcb42008d509900b3a3b2d695564eeb4dbdac (#5098)

2026-02-03 09:35:40 -05:00 · 2019-04-17 23:38:44 -07:00
parent 1fec26c6b3
commit b852f032d3
63 changed files with 676 additions and 413 deletions
--- a/src/vs/base/node/encoding.ts
+++ b/src/vs/base/node/encoding.ts
@@ -18,15 +18,20 @@ export const UTF16be_BOM = [0xFE, 0xFF];
 export const UTF16le_BOM = [0xFF, 0xFE];
 export const UTF8_BOM = [0xEF, 0xBB, 0xBF];

+const ZERO_BYTE_DETECTION_BUFFER_MAX_LEN = 512; // number of bytes to look at to decide about a file being binary or not
+const NO_GUESS_BUFFER_MAX_LEN = 512; 			// when not auto guessing the encoding, small number of bytes are enough
+const AUTO_GUESS_BUFFER_MAX_LEN = 512 * 8; 		// with auto guessing we want a lot more content to be read for guessing
+
 export interface IDecodeStreamOptions {
-	guessEncoding?: boolean;
+	guessEncoding: boolean;
 	minBytesRequiredForDetection?: number;
-	overwriteEncoding?(detectedEncoding: string | null): string;
+
+	overwriteEncoding(detectedEncoding: string | null): string;
 }

 export interface IDecodeStreamResult {
-	detected: IDetectedEncodingResult;
 	stream: NodeJS.ReadableStream;
+	detected: IDetectedEncodingResult;
 }

 export function toDecodeStream(readable: Readable, options: IDecodeStreamOptions): Promise<IDecodeStreamResult> {
@@ -34,78 +39,82 @@ export function toDecodeStream(readable: Readable, options: IDecodeStreamOptions
 		options.minBytesRequiredForDetection = options.guessEncoding ? AUTO_GUESS_BUFFER_MAX_LEN : NO_GUESS_BUFFER_MAX_LEN;
 	}

-	if (!options.overwriteEncoding) {
-		options.overwriteEncoding = detected => detected || UTF8;
-	}
-
 	return new Promise<IDecodeStreamResult>((resolve, reject) => {
 		const writer = new class extends Writable {
 			private decodeStream: NodeJS.ReadWriteStream;
-			private decodeStreamConstruction: Promise<void>;
-			private buffer: Buffer[] = [];
+			private decodeStreamPromise: Promise<void>;
+
+			private bufferedChunks: Buffer[] = [];
 			private bytesBuffered = 0;

-			_write(chunk: any, encoding: string, callback: Function): void {
+			_write(chunk: Buffer, encoding: string, callback: (error: Error | null) => void): void {
 				if (!Buffer.isBuffer(chunk)) {
-					callback(new Error('data must be a buffer'));
+					return callback(new Error('toDecodeStream(): data must be a buffer'));
 				}

+				// if the decode stream is ready, we just write directly
 				if (this.decodeStream) {
-					this.decodeStream.write(chunk, callback); // just a forwarder now
+					this.decodeStream.write(chunk, callback);

 					return;
 				}

-				this.buffer.push(chunk);
-				this.bytesBuffered += chunk.length;
+				// otherwise we need to buffer the data until the stream is ready
+				this.bufferedChunks.push(chunk);
+				this.bytesBuffered += chunk.byteLength;

 				// waiting for the decoder to be ready
-				if (this.decodeStreamConstruction) {
-					this.decodeStreamConstruction.then(() => callback(), err => callback(err));
+				if (this.decodeStreamPromise) {
+					this.decodeStreamPromise.then(() => callback(null), error => callback(error));
 				}

-				// buffered enough data, create stream and forward data
+				// buffered enough data for encoding detection, create stream and forward data
 				else if (typeof options.minBytesRequiredForDetection === 'number' && this.bytesBuffered >= options.minBytesRequiredForDetection) {
 					this._startDecodeStream(callback);
 				}

-				// only buffering
+				// only buffering until enough data for encoding detection is there
 				else {
-					callback();
+					callback(null);
 				}
 			}

-			_startDecodeStream(callback: Function): void {
-				this.decodeStreamConstruction = Promise.resolve(detectEncodingFromBuffer({
-					buffer: Buffer.concat(this.buffer),
+			_startDecodeStream(callback: (error: Error | null) => void): void {
+
+				// detect encoding from buffer
+				this.decodeStreamPromise = Promise.resolve(detectEncodingFromBuffer({
+					buffer: Buffer.concat(this.bufferedChunks),
 					bytesRead: this.bytesBuffered
 				}, options.guessEncoding)).then(detected => {
-					if (options.overwriteEncoding) {
-						detected.encoding = options.overwriteEncoding(detected.encoding);
-					}

+					// ensure to respect overwrite of encoding
+					detected.encoding = options.overwriteEncoding(detected.encoding);
+
+					// decode and write buffer
 					this.decodeStream = decodeStream(detected.encoding);
+					this.decodeStream.write(Buffer.concat(this.bufferedChunks), callback);
+					this.bufferedChunks.length = 0;

-					for (const buffer of this.buffer) {
-						this.decodeStream.write(buffer);
-					}
-
-					callback();
+					// signal to the outside our detected encoding
+					// and final decoder stream
 					resolve({ detected, stream: this.decodeStream });
-				}, err => {
-					this.emit('error', err);
-					callback(err);
+				}, error => {
+					this.emit('error', error);
+
+					callback(error);
 				});
 			}

-			_final(callback: (err?: any) => any) {
+			_final(callback: (error: Error | null) => void) {

 				// normal finish
 				if (this.decodeStream) {
 					this.decodeStream.end(callback);
 				}

-				// we were still waiting for data...
+				// we were still waiting for data to do the encoding
+				// detection. thus, wrap up starting the stream even
+				// without all the data to get things going
 				else {
 					this._startDecodeStream(() => this.decodeStream.end(callback));
 				}
@@ -149,7 +158,7 @@ function toNodeEncoding(enc: string | null): string {
 }

 export function detectEncodingByBOMFromBuffer(buffer: Buffer | VSBuffer | null, bytesRead: number): string | null {
-	if (!buffer || bytesRead < 2) {
+	if (!buffer || bytesRead < UTF16be_BOM.length) {
 		return null;
 	}

@@ -166,7 +175,7 @@ export function detectEncodingByBOMFromBuffer(buffer: Buffer | VSBuffer | null,
 		return UTF16le;
 	}

-	if (bytesRead < 3) {
+	if (bytesRead < UTF8_BOM.length) {
 		return null;
 	}

@@ -256,10 +265,6 @@ export function toCanonicalName(enc: string): string {
 	}
 }

-const ZERO_BYTE_DETECTION_BUFFER_MAX_LEN = 512; // number of bytes to look at to decide about a file being binary or not
-const NO_GUESS_BUFFER_MAX_LEN = 512; 			// when not auto guessing the encoding, small number of bytes are enough
-const AUTO_GUESS_BUFFER_MAX_LEN = 512 * 8; 		// with auto guessing we want a lot more content to be read for guessing
-
 export interface IDetectedEncodingResult {
 	encoding: string | null;
 	seemsBinary: boolean;