mirror of
https://github.com/ckaczor/azuredatastudio.git
synced 2026-02-01 01:25:38 -05:00
Merge from vscode a234f13c45b40a0929777cb440ee011b7549eed2 (#8911)
* Merge from vscode a234f13c45b40a0929777cb440ee011b7549eed2 * update distro * fix layering * update distro * fix tests
This commit is contained in:
@@ -4,8 +4,6 @@
|
||||
*--------------------------------------------------------------------------------------------*/
|
||||
|
||||
import * as iconv from 'iconv-lite';
|
||||
import { isLinux, isMacintosh } from 'vs/base/common/platform';
|
||||
import { exec } from 'child_process';
|
||||
import { Readable, Writable } from 'stream';
|
||||
import { VSBuffer } from 'vs/base/common/buffer';
|
||||
|
||||
@@ -24,9 +22,10 @@ export const UTF16be_BOM = [0xFE, 0xFF];
|
||||
export const UTF16le_BOM = [0xFF, 0xFE];
|
||||
export const UTF8_BOM = [0xEF, 0xBB, 0xBF];
|
||||
|
||||
const ZERO_BYTE_DETECTION_BUFFER_MAX_LEN = 512; // number of bytes to look at to decide about a file being binary or not
|
||||
const NO_GUESS_BUFFER_MAX_LEN = 512; // when not auto guessing the encoding, small number of bytes are enough
|
||||
const AUTO_GUESS_BUFFER_MAX_LEN = 512 * 8; // with auto guessing we want a lot more content to be read for guessing
|
||||
const ZERO_BYTE_DETECTION_BUFFER_MAX_LEN = 512; // number of bytes to look at to decide about a file being binary or not
|
||||
const NO_ENCODING_GUESS_MIN_BYTES = 512; // when not auto guessing the encoding, small number of bytes are enough
|
||||
const AUTO_ENCODING_GUESS_MIN_BYTES = 512 * 8; // with auto guessing we want a lot more content to be read for guessing
|
||||
const AUTO_ENCODING_GUESS_MAX_BYTES = 512 * 128; // set an upper limit for the number of bytes we pass on to jschardet
|
||||
|
||||
export interface IDecodeStreamOptions {
|
||||
guessEncoding: boolean;
|
||||
@@ -42,7 +41,7 @@ export interface IDecodeStreamResult {
|
||||
|
||||
export function toDecodeStream(readable: Readable, options: IDecodeStreamOptions): Promise<IDecodeStreamResult> {
|
||||
if (!options.minBytesRequiredForDetection) {
|
||||
options.minBytesRequiredForDetection = options.guessEncoding ? AUTO_GUESS_BUFFER_MAX_LEN : NO_GUESS_BUFFER_MAX_LEN;
|
||||
options.minBytesRequiredForDetection = options.guessEncoding ? AUTO_ENCODING_GUESS_MIN_BYTES : NO_ENCODING_GUESS_MIN_BYTES;
|
||||
}
|
||||
|
||||
return new Promise<IDecodeStreamResult>((resolve, reject) => {
|
||||
@@ -212,7 +211,7 @@ const IGNORE_ENCODINGS = ['ascii', 'utf-16', 'utf-32'];
|
||||
async function guessEncodingByBuffer(buffer: Buffer): Promise<string | null> {
|
||||
const jschardet = await import('jschardet');
|
||||
|
||||
const guessed = jschardet.detect(buffer);
|
||||
const guessed = jschardet.detect(buffer.slice(0, AUTO_ENCODING_GUESS_MAX_BYTES)); // ensure to limit buffer for guessing due to https://github.com/aadsm/jschardet/issues/53
|
||||
if (!guessed || !guessed.encoding) {
|
||||
return null;
|
||||
}
|
||||
@@ -353,87 +352,3 @@ export function detectEncodingFromBuffer({ buffer, bytesRead }: IReadResult, aut
|
||||
|
||||
return { seemsBinary, encoding };
|
||||
}
|
||||
|
||||
// https://ss64.com/nt/chcp.html
|
||||
const windowsTerminalEncodings = {
|
||||
'437': 'cp437', // United States
|
||||
'850': 'cp850', // Multilingual(Latin I)
|
||||
'852': 'cp852', // Slavic(Latin II)
|
||||
'855': 'cp855', // Cyrillic(Russian)
|
||||
'857': 'cp857', // Turkish
|
||||
'860': 'cp860', // Portuguese
|
||||
'861': 'cp861', // Icelandic
|
||||
'863': 'cp863', // Canadian - French
|
||||
'865': 'cp865', // Nordic
|
||||
'866': 'cp866', // Russian
|
||||
'869': 'cp869', // Modern Greek
|
||||
'936': 'cp936', // Simplified Chinese
|
||||
'1252': 'cp1252' // West European Latin
|
||||
};
|
||||
|
||||
export async function resolveTerminalEncoding(verbose?: boolean): Promise<string> {
|
||||
let rawEncodingPromise: Promise<string>;
|
||||
|
||||
// Support a global environment variable to win over other mechanics
|
||||
const cliEncodingEnv = process.env['VSCODE_CLI_ENCODING'];
|
||||
if (cliEncodingEnv) {
|
||||
if (verbose) {
|
||||
console.log(`Found VSCODE_CLI_ENCODING variable: ${cliEncodingEnv}`);
|
||||
}
|
||||
|
||||
rawEncodingPromise = Promise.resolve(cliEncodingEnv);
|
||||
}
|
||||
|
||||
// Linux/Mac: use "locale charmap" command
|
||||
else if (isLinux || isMacintosh) {
|
||||
rawEncodingPromise = new Promise<string>(resolve => {
|
||||
if (verbose) {
|
||||
console.log('Running "locale charmap" to detect terminal encoding...');
|
||||
}
|
||||
|
||||
exec('locale charmap', (err, stdout, stderr) => resolve(stdout));
|
||||
});
|
||||
}
|
||||
|
||||
// Windows: educated guess
|
||||
else {
|
||||
rawEncodingPromise = new Promise<string>(resolve => {
|
||||
if (verbose) {
|
||||
console.log('Running "chcp" to detect terminal encoding...');
|
||||
}
|
||||
|
||||
exec('chcp', (err, stdout, stderr) => {
|
||||
if (stdout) {
|
||||
const windowsTerminalEncodingKeys = Object.keys(windowsTerminalEncodings) as Array<keyof typeof windowsTerminalEncodings>;
|
||||
for (const key of windowsTerminalEncodingKeys) {
|
||||
if (stdout.indexOf(key) >= 0) {
|
||||
return resolve(windowsTerminalEncodings[key]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return resolve(undefined);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
const rawEncoding = await rawEncodingPromise;
|
||||
if (verbose) {
|
||||
console.log(`Detected raw terminal encoding: ${rawEncoding}`);
|
||||
}
|
||||
|
||||
if (!rawEncoding || rawEncoding.toLowerCase() === 'utf-8' || rawEncoding.toLowerCase() === UTF8) {
|
||||
return UTF8;
|
||||
}
|
||||
|
||||
const iconvEncoding = toIconvLiteEncoding(rawEncoding);
|
||||
if (iconv.encodingExists(iconvEncoding)) {
|
||||
return iconvEncoding;
|
||||
}
|
||||
|
||||
if (verbose) {
|
||||
console.log('Unsupported terminal encoding, falling back to UTF-8.');
|
||||
}
|
||||
|
||||
return UTF8;
|
||||
}
|
||||
|
||||
98
src/vs/base/node/terminalEncoding.ts
Normal file
98
src/vs/base/node/terminalEncoding.ts
Normal file
@@ -0,0 +1,98 @@
|
||||
/*---------------------------------------------------------------------------------------------
|
||||
* Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the Source EULA. See License.txt in the project root for license information.
|
||||
*--------------------------------------------------------------------------------------------*/
|
||||
|
||||
/**
|
||||
* This code is also used by standalone cli's. Avoid adding dependencies to keep the size of the cli small.
|
||||
*/
|
||||
import { exec } from 'child_process';
|
||||
import * as os from 'os';
|
||||
|
||||
const windowsTerminalEncodings = {
|
||||
'437': 'cp437', // United States
|
||||
'850': 'cp850', // Multilingual(Latin I)
|
||||
'852': 'cp852', // Slavic(Latin II)
|
||||
'855': 'cp855', // Cyrillic(Russian)
|
||||
'857': 'cp857', // Turkish
|
||||
'860': 'cp860', // Portuguese
|
||||
'861': 'cp861', // Icelandic
|
||||
'863': 'cp863', // Canadian - French
|
||||
'865': 'cp865', // Nordic
|
||||
'866': 'cp866', // Russian
|
||||
'869': 'cp869', // Modern Greek
|
||||
'936': 'cp936', // Simplified Chinese
|
||||
'1252': 'cp1252' // West European Latin
|
||||
};
|
||||
|
||||
function toIconvLiteEncoding(encodingName: string): string {
|
||||
const normalizedEncodingName = encodingName.replace(/[^a-zA-Z0-9]/g, '').toLowerCase();
|
||||
const mapped = JSCHARDET_TO_ICONV_ENCODINGS[normalizedEncodingName];
|
||||
|
||||
return mapped || normalizedEncodingName;
|
||||
}
|
||||
|
||||
const JSCHARDET_TO_ICONV_ENCODINGS: { [name: string]: string } = {
|
||||
'ibm866': 'cp866',
|
||||
'big5': 'cp950'
|
||||
};
|
||||
|
||||
const UTF8 = 'utf8';
|
||||
|
||||
|
||||
export async function resolveTerminalEncoding(verbose?: boolean): Promise<string> {
|
||||
let rawEncodingPromise: Promise<string>;
|
||||
|
||||
// Support a global environment variable to win over other mechanics
|
||||
const cliEncodingEnv = process.env['VSCODE_CLI_ENCODING'];
|
||||
if (cliEncodingEnv) {
|
||||
if (verbose) {
|
||||
console.log(`Found VSCODE_CLI_ENCODING variable: ${cliEncodingEnv}`);
|
||||
}
|
||||
|
||||
rawEncodingPromise = Promise.resolve(cliEncodingEnv);
|
||||
}
|
||||
|
||||
// Windows: educated guess
|
||||
else if (os.platform() === 'win32') {
|
||||
rawEncodingPromise = new Promise<string>(resolve => {
|
||||
if (verbose) {
|
||||
console.log('Running "chcp" to detect terminal encoding...');
|
||||
}
|
||||
|
||||
exec('chcp', (err, stdout, stderr) => {
|
||||
if (stdout) {
|
||||
const windowsTerminalEncodingKeys = Object.keys(windowsTerminalEncodings) as Array<keyof typeof windowsTerminalEncodings>;
|
||||
for (const key of windowsTerminalEncodingKeys) {
|
||||
if (stdout.indexOf(key) >= 0) {
|
||||
return resolve(windowsTerminalEncodings[key]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return resolve(undefined);
|
||||
});
|
||||
});
|
||||
}
|
||||
// Linux/Mac: use "locale charmap" command
|
||||
else {
|
||||
rawEncodingPromise = new Promise<string>(resolve => {
|
||||
if (verbose) {
|
||||
console.log('Running "locale charmap" to detect terminal encoding...');
|
||||
}
|
||||
|
||||
exec('locale charmap', (err, stdout, stderr) => resolve(stdout));
|
||||
});
|
||||
}
|
||||
|
||||
const rawEncoding = await rawEncodingPromise;
|
||||
if (verbose) {
|
||||
console.log(`Detected raw terminal encoding: ${rawEncoding}`);
|
||||
}
|
||||
|
||||
if (!rawEncoding || rawEncoding.toLowerCase() === 'utf-8' || rawEncoding.toLowerCase() === UTF8) {
|
||||
return UTF8;
|
||||
}
|
||||
|
||||
return toIconvLiteEncoding(rawEncoding);
|
||||
}
|
||||
Reference in New Issue
Block a user