Merge from vscode a234f13c45b40a0929777cb440ee011b7549eed2 (#8911)

* Merge from vscode a234f13c45b40a0929777cb440ee011b7549eed2

* update distro

* fix layering

* update distro

* fix tests
This commit is contained in:
Anthony Dresser
2020-01-22 13:42:37 -08:00
committed by GitHub
parent 977111eb21
commit bd7aac8ee0
895 changed files with 24651 additions and 14520 deletions

View File

@@ -4,8 +4,6 @@
*--------------------------------------------------------------------------------------------*/
import * as iconv from 'iconv-lite';
import { isLinux, isMacintosh } from 'vs/base/common/platform';
import { exec } from 'child_process';
import { Readable, Writable } from 'stream';
import { VSBuffer } from 'vs/base/common/buffer';
@@ -24,9 +22,10 @@ export const UTF16be_BOM = [0xFE, 0xFF];
export const UTF16le_BOM = [0xFF, 0xFE];
export const UTF8_BOM = [0xEF, 0xBB, 0xBF];
const ZERO_BYTE_DETECTION_BUFFER_MAX_LEN = 512; // number of bytes to look at to decide about a file being binary or not
const NO_GUESS_BUFFER_MAX_LEN = 512; // when not auto guessing the encoding, small number of bytes are enough
const AUTO_GUESS_BUFFER_MAX_LEN = 512 * 8; // with auto guessing we want a lot more content to be read for guessing
const ZERO_BYTE_DETECTION_BUFFER_MAX_LEN = 512; // number of bytes to look at to decide about a file being binary or not
const NO_ENCODING_GUESS_MIN_BYTES = 512; // when not auto guessing the encoding, small number of bytes are enough
const AUTO_ENCODING_GUESS_MIN_BYTES = 512 * 8; // with auto guessing we want a lot more content to be read for guessing
const AUTO_ENCODING_GUESS_MAX_BYTES = 512 * 128; // set an upper limit for the number of bytes we pass on to jschardet
export interface IDecodeStreamOptions {
guessEncoding: boolean;
@@ -42,7 +41,7 @@ export interface IDecodeStreamResult {
export function toDecodeStream(readable: Readable, options: IDecodeStreamOptions): Promise<IDecodeStreamResult> {
if (!options.minBytesRequiredForDetection) {
options.minBytesRequiredForDetection = options.guessEncoding ? AUTO_GUESS_BUFFER_MAX_LEN : NO_GUESS_BUFFER_MAX_LEN;
options.minBytesRequiredForDetection = options.guessEncoding ? AUTO_ENCODING_GUESS_MIN_BYTES : NO_ENCODING_GUESS_MIN_BYTES;
}
return new Promise<IDecodeStreamResult>((resolve, reject) => {
@@ -212,7 +211,7 @@ const IGNORE_ENCODINGS = ['ascii', 'utf-16', 'utf-32'];
async function guessEncodingByBuffer(buffer: Buffer): Promise<string | null> {
const jschardet = await import('jschardet');
const guessed = jschardet.detect(buffer);
const guessed = jschardet.detect(buffer.slice(0, AUTO_ENCODING_GUESS_MAX_BYTES)); // ensure to limit buffer for guessing due to https://github.com/aadsm/jschardet/issues/53
if (!guessed || !guessed.encoding) {
return null;
}
@@ -353,87 +352,3 @@ export function detectEncodingFromBuffer({ buffer, bytesRead }: IReadResult, aut
return { seemsBinary, encoding };
}
// https://ss64.com/nt/chcp.html
const windowsTerminalEncodings = {
'437': 'cp437', // United States
'850': 'cp850', // Multilingual(Latin I)
'852': 'cp852', // Slavic(Latin II)
'855': 'cp855', // Cyrillic(Russian)
'857': 'cp857', // Turkish
'860': 'cp860', // Portuguese
'861': 'cp861', // Icelandic
'863': 'cp863', // Canadian - French
'865': 'cp865', // Nordic
'866': 'cp866', // Russian
'869': 'cp869', // Modern Greek
'936': 'cp936', // Simplified Chinese
'1252': 'cp1252' // West European Latin
};
export async function resolveTerminalEncoding(verbose?: boolean): Promise<string> {
let rawEncodingPromise: Promise<string>;
// Support a global environment variable to win over other mechanics
const cliEncodingEnv = process.env['VSCODE_CLI_ENCODING'];
if (cliEncodingEnv) {
if (verbose) {
console.log(`Found VSCODE_CLI_ENCODING variable: ${cliEncodingEnv}`);
}
rawEncodingPromise = Promise.resolve(cliEncodingEnv);
}
// Linux/Mac: use "locale charmap" command
else if (isLinux || isMacintosh) {
rawEncodingPromise = new Promise<string>(resolve => {
if (verbose) {
console.log('Running "locale charmap" to detect terminal encoding...');
}
exec('locale charmap', (err, stdout, stderr) => resolve(stdout));
});
}
// Windows: educated guess
else {
rawEncodingPromise = new Promise<string>(resolve => {
if (verbose) {
console.log('Running "chcp" to detect terminal encoding...');
}
exec('chcp', (err, stdout, stderr) => {
if (stdout) {
const windowsTerminalEncodingKeys = Object.keys(windowsTerminalEncodings) as Array<keyof typeof windowsTerminalEncodings>;
for (const key of windowsTerminalEncodingKeys) {
if (stdout.indexOf(key) >= 0) {
return resolve(windowsTerminalEncodings[key]);
}
}
}
return resolve(undefined);
});
});
}
const rawEncoding = await rawEncodingPromise;
if (verbose) {
console.log(`Detected raw terminal encoding: ${rawEncoding}`);
}
if (!rawEncoding || rawEncoding.toLowerCase() === 'utf-8' || rawEncoding.toLowerCase() === UTF8) {
return UTF8;
}
const iconvEncoding = toIconvLiteEncoding(rawEncoding);
if (iconv.encodingExists(iconvEncoding)) {
return iconvEncoding;
}
if (verbose) {
console.log('Unsupported terminal encoding, falling back to UTF-8.');
}
return UTF8;
}

View File

@@ -0,0 +1,98 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the Source EULA. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
/**
* This code is also used by standalone cli's. Avoid adding dependencies to keep the size of the cli small.
*/
import { exec } from 'child_process';
import * as os from 'os';
const windowsTerminalEncodings = {
'437': 'cp437', // United States
'850': 'cp850', // Multilingual(Latin I)
'852': 'cp852', // Slavic(Latin II)
'855': 'cp855', // Cyrillic(Russian)
'857': 'cp857', // Turkish
'860': 'cp860', // Portuguese
'861': 'cp861', // Icelandic
'863': 'cp863', // Canadian - French
'865': 'cp865', // Nordic
'866': 'cp866', // Russian
'869': 'cp869', // Modern Greek
'936': 'cp936', // Simplified Chinese
'1252': 'cp1252' // West European Latin
};
function toIconvLiteEncoding(encodingName: string): string {
const normalizedEncodingName = encodingName.replace(/[^a-zA-Z0-9]/g, '').toLowerCase();
const mapped = JSCHARDET_TO_ICONV_ENCODINGS[normalizedEncodingName];
return mapped || normalizedEncodingName;
}
const JSCHARDET_TO_ICONV_ENCODINGS: { [name: string]: string } = {
'ibm866': 'cp866',
'big5': 'cp950'
};
const UTF8 = 'utf8';
export async function resolveTerminalEncoding(verbose?: boolean): Promise<string> {
let rawEncodingPromise: Promise<string>;
// Support a global environment variable to win over other mechanics
const cliEncodingEnv = process.env['VSCODE_CLI_ENCODING'];
if (cliEncodingEnv) {
if (verbose) {
console.log(`Found VSCODE_CLI_ENCODING variable: ${cliEncodingEnv}`);
}
rawEncodingPromise = Promise.resolve(cliEncodingEnv);
}
// Windows: educated guess
else if (os.platform() === 'win32') {
rawEncodingPromise = new Promise<string>(resolve => {
if (verbose) {
console.log('Running "chcp" to detect terminal encoding...');
}
exec('chcp', (err, stdout, stderr) => {
if (stdout) {
const windowsTerminalEncodingKeys = Object.keys(windowsTerminalEncodings) as Array<keyof typeof windowsTerminalEncodings>;
for (const key of windowsTerminalEncodingKeys) {
if (stdout.indexOf(key) >= 0) {
return resolve(windowsTerminalEncodings[key]);
}
}
}
return resolve(undefined);
});
});
}
// Linux/Mac: use "locale charmap" command
else {
rawEncodingPromise = new Promise<string>(resolve => {
if (verbose) {
console.log('Running "locale charmap" to detect terminal encoding...');
}
exec('locale charmap', (err, stdout, stderr) => resolve(stdout));
});
}
const rawEncoding = await rawEncodingPromise;
if (verbose) {
console.log(`Detected raw terminal encoding: ${rawEncoding}`);
}
if (!rawEncoding || rawEncoding.toLowerCase() === 'utf-8' || rawEncoding.toLowerCase() === UTF8) {
return UTF8;
}
return toIconvLiteEncoding(rawEncoding);
}