Merge from vscode a234f13c45b40a0929777cb440ee011b7549eed2 (#8911)

* Merge from vscode a234f13c45b40a0929777cb440ee011b7549eed2 * update distro * fix layering * update distro * fix tests
2026-02-01 01:25:38 -05:00 · 2020-01-22 13:42:37 -08:00
parent 977111eb21
commit bd7aac8ee0
895 changed files with 24651 additions and 14520 deletions
--- a/src/vs/base/node/encoding.ts
+++ b/src/vs/base/node/encoding.ts
@@ -4,8 +4,6 @@
 *--------------------------------------------------------------------------------------------*/

 import * as iconv from 'iconv-lite';
-import { isLinux, isMacintosh } from 'vs/base/common/platform';
-import { exec } from 'child_process';
 import { Readable, Writable } from 'stream';
 import { VSBuffer } from 'vs/base/common/buffer';

@@ -24,9 +22,10 @@ export const UTF16be_BOM = [0xFE, 0xFF];
 export const UTF16le_BOM = [0xFF, 0xFE];
 export const UTF8_BOM = [0xEF, 0xBB, 0xBF];

-const ZERO_BYTE_DETECTION_BUFFER_MAX_LEN = 512; // number of bytes to look at to decide about a file being binary or not
-const NO_GUESS_BUFFER_MAX_LEN = 512; 			// when not auto guessing the encoding, small number of bytes are enough
-const AUTO_GUESS_BUFFER_MAX_LEN = 512 * 8; 		// with auto guessing we want a lot more content to be read for guessing
+const ZERO_BYTE_DETECTION_BUFFER_MAX_LEN = 512; 	// number of bytes to look at to decide about a file being binary or not
+const NO_ENCODING_GUESS_MIN_BYTES = 512; 			// when not auto guessing the encoding, small number of bytes are enough
+const AUTO_ENCODING_GUESS_MIN_BYTES = 512 * 8; 		// with auto guessing we want a lot more content to be read for guessing
+const AUTO_ENCODING_GUESS_MAX_BYTES = 512 * 128; 	// set an upper limit for the number of bytes we pass on to jschardet

 export interface IDecodeStreamOptions {
 	guessEncoding: boolean;
@@ -42,7 +41,7 @@ export interface IDecodeStreamResult {

 export function toDecodeStream(readable: Readable, options: IDecodeStreamOptions): Promise<IDecodeStreamResult> {
 	if (!options.minBytesRequiredForDetection) {
-		options.minBytesRequiredForDetection = options.guessEncoding ? AUTO_GUESS_BUFFER_MAX_LEN : NO_GUESS_BUFFER_MAX_LEN;
+		options.minBytesRequiredForDetection = options.guessEncoding ? AUTO_ENCODING_GUESS_MIN_BYTES : NO_ENCODING_GUESS_MIN_BYTES;
 	}

 	return new Promise<IDecodeStreamResult>((resolve, reject) => {
@@ -212,7 +211,7 @@ const IGNORE_ENCODINGS = ['ascii', 'utf-16', 'utf-32'];
 async function guessEncodingByBuffer(buffer: Buffer): Promise<string | null> {
 	const jschardet = await import('jschardet');

-	const guessed = jschardet.detect(buffer);
+	const guessed = jschardet.detect(buffer.slice(0, AUTO_ENCODING_GUESS_MAX_BYTES)); // ensure to limit buffer for guessing due to https://github.com/aadsm/jschardet/issues/53
 	if (!guessed || !guessed.encoding) {
 		return null;
 	}
@@ -353,87 +352,3 @@ export function detectEncodingFromBuffer({ buffer, bytesRead }: IReadResult, aut

 	return { seemsBinary, encoding };
 }
-
-// https://ss64.com/nt/chcp.html
-const windowsTerminalEncodings = {
-	'437': 'cp437', // United States
-	'850': 'cp850', // Multilingual(Latin I)
-	'852': 'cp852', // Slavic(Latin II)
-	'855': 'cp855', // Cyrillic(Russian)
-	'857': 'cp857', // Turkish
-	'860': 'cp860', // Portuguese
-	'861': 'cp861', // Icelandic
-	'863': 'cp863', // Canadian - French
-	'865': 'cp865', // Nordic
-	'866': 'cp866', // Russian
-	'869': 'cp869', // Modern Greek
-	'936': 'cp936', // Simplified Chinese
-	'1252': 'cp1252' // West European Latin
-};
-
-export async function resolveTerminalEncoding(verbose?: boolean): Promise<string> {
-	let rawEncodingPromise: Promise<string>;
-
-	// Support a global environment variable to win over other mechanics
-	const cliEncodingEnv = process.env['VSCODE_CLI_ENCODING'];
-	if (cliEncodingEnv) {
-		if (verbose) {
-			console.log(`Found VSCODE_CLI_ENCODING variable: ${cliEncodingEnv}`);
-		}
-
-		rawEncodingPromise = Promise.resolve(cliEncodingEnv);
-	}
-
-	// Linux/Mac: use "locale charmap" command
-	else if (isLinux || isMacintosh) {
-		rawEncodingPromise = new Promise<string>(resolve => {
-			if (verbose) {
-				console.log('Running "locale charmap" to detect terminal encoding...');
-			}
-
-			exec('locale charmap', (err, stdout, stderr) => resolve(stdout));
-		});
-	}
-
-	// Windows: educated guess
-	else {
-		rawEncodingPromise = new Promise<string>(resolve => {
-			if (verbose) {
-				console.log('Running "chcp" to detect terminal encoding...');
-			}
-
-			exec('chcp', (err, stdout, stderr) => {
-				if (stdout) {
-					const windowsTerminalEncodingKeys = Object.keys(windowsTerminalEncodings) as Array<keyof typeof windowsTerminalEncodings>;
-					for (const key of windowsTerminalEncodingKeys) {
-						if (stdout.indexOf(key) >= 0) {
-							return resolve(windowsTerminalEncodings[key]);
-						}
-					}
-				}
-
-				return resolve(undefined);
-			});
-		});
-	}
-
-	const rawEncoding = await rawEncodingPromise;
-	if (verbose) {
-		console.log(`Detected raw terminal encoding: ${rawEncoding}`);
-	}
-
-	if (!rawEncoding || rawEncoding.toLowerCase() === 'utf-8' || rawEncoding.toLowerCase() === UTF8) {
-		return UTF8;
-	}
-
-	const iconvEncoding = toIconvLiteEncoding(rawEncoding);
-	if (iconv.encodingExists(iconvEncoding)) {
-		return iconvEncoding;
-	}
-
-	if (verbose) {
-		console.log('Unsupported terminal encoding, falling back to UTF-8.');
-	}
-
-	return UTF8;
-}
--- a/src/vs/base/node/terminalEncoding.ts
+++ b/src/vs/base/node/terminalEncoding.ts
@@ -0,0 +1,98 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the Source EULA. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * This code is also used by standalone cli's. Avoid adding dependencies to keep the size of the cli small.
+ */
+import { exec } from 'child_process';
+import * as os from 'os';
+
+const windowsTerminalEncodings = {
+	'437': 'cp437', // United States
+	'850': 'cp850', // Multilingual(Latin I)
+	'852': 'cp852', // Slavic(Latin II)
+	'855': 'cp855', // Cyrillic(Russian)
+	'857': 'cp857', // Turkish
+	'860': 'cp860', // Portuguese
+	'861': 'cp861', // Icelandic
+	'863': 'cp863', // Canadian - French
+	'865': 'cp865', // Nordic
+	'866': 'cp866', // Russian
+	'869': 'cp869', // Modern Greek
+	'936': 'cp936', // Simplified Chinese
+	'1252': 'cp1252' // West European Latin
+};
+
+function toIconvLiteEncoding(encodingName: string): string {
+	const normalizedEncodingName = encodingName.replace(/[^a-zA-Z0-9]/g, '').toLowerCase();
+	const mapped = JSCHARDET_TO_ICONV_ENCODINGS[normalizedEncodingName];
+
+	return mapped || normalizedEncodingName;
+}
+
+const JSCHARDET_TO_ICONV_ENCODINGS: { [name: string]: string } = {
+	'ibm866': 'cp866',
+	'big5': 'cp950'
+};
+
+const UTF8 = 'utf8';
+
+
+export async function resolveTerminalEncoding(verbose?: boolean): Promise<string> {
+	let rawEncodingPromise: Promise<string>;
+
+	// Support a global environment variable to win over other mechanics
+	const cliEncodingEnv = process.env['VSCODE_CLI_ENCODING'];
+	if (cliEncodingEnv) {
+		if (verbose) {
+			console.log(`Found VSCODE_CLI_ENCODING variable: ${cliEncodingEnv}`);
+		}
+
+		rawEncodingPromise = Promise.resolve(cliEncodingEnv);
+	}
+
+	// Windows: educated guess
+	else if (os.platform() === 'win32') {
+		rawEncodingPromise = new Promise<string>(resolve => {
+			if (verbose) {
+				console.log('Running "chcp" to detect terminal encoding...');
+			}
+
+			exec('chcp', (err, stdout, stderr) => {
+				if (stdout) {
+					const windowsTerminalEncodingKeys = Object.keys(windowsTerminalEncodings) as Array<keyof typeof windowsTerminalEncodings>;
+					for (const key of windowsTerminalEncodingKeys) {
+						if (stdout.indexOf(key) >= 0) {
+							return resolve(windowsTerminalEncodings[key]);
+						}
+					}
+				}
+
+				return resolve(undefined);
+			});
+		});
+	}
+	// Linux/Mac: use "locale charmap" command
+	else {
+		rawEncodingPromise = new Promise<string>(resolve => {
+			if (verbose) {
+				console.log('Running "locale charmap" to detect terminal encoding...');
+			}
+
+			exec('locale charmap', (err, stdout, stderr) => resolve(stdout));
+		});
+	}
+
+	const rawEncoding = await rawEncodingPromise;
+	if (verbose) {
+		console.log(`Detected raw terminal encoding: ${rawEncoding}`);
+	}
+
+	if (!rawEncoding || rawEncoding.toLowerCase() === 'utf-8' || rawEncoding.toLowerCase() === UTF8) {
+		return UTF8;
+	}
+
+	return toIconvLiteEncoding(rawEncoding);
+}