SQL Operations Studio Public Preview 1 (0.23) release source code

2026-02-06 17:23:53 -05:00 · 2017-11-09 14:30:27 -08:00
parent b88ecb8d93
commit 3cdac41339
8829 changed files with 759707 additions and 286 deletions
--- a/src/vs/base/node/encoding.ts
+++ b/src/vs/base/node/encoding.ts
@@ -0,0 +1,171 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the Source EULA. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+'use strict';
+
+import stream = require('vs/base/node/stream');
+import iconv = require('iconv-lite');
+import { TPromise } from 'vs/base/common/winjs.base';
+
+export const UTF8 = 'utf8';
+export const UTF8_with_bom = 'utf8bom';
+export const UTF16be = 'utf16be';
+export const UTF16le = 'utf16le';
+
+export function bomLength(encoding: string): number {
+	switch (encoding) {
+		case UTF8:
+			return 3;
+		case UTF16be:
+		case UTF16le:
+			return 2;
+	}
+
+	return 0;
+}
+
+export function decode(buffer: NodeBuffer, encoding: string, options?: any): string {
+	return iconv.decode(buffer, toNodeEncoding(encoding), options);
+}
+
+export function encode(content: string, encoding: string, options?: any): NodeBuffer {
+	return iconv.encode(content, toNodeEncoding(encoding), options);
+}
+
+export function encodingExists(encoding: string): boolean {
+	return iconv.encodingExists(toNodeEncoding(encoding));
+}
+
+export function decodeStream(encoding: string): NodeJS.ReadWriteStream {
+	return iconv.decodeStream(toNodeEncoding(encoding));
+}
+
+export function encodeStream(encoding: string): NodeJS.ReadWriteStream {
+	return iconv.encodeStream(toNodeEncoding(encoding));
+}
+
+function toNodeEncoding(enc: string): string {
+	if (enc === UTF8_with_bom) {
+		return UTF8; // iconv does not distinguish UTF 8 with or without BOM, so we need to help it
+	}
+
+	return enc;
+}
+
+export function detectEncodingByBOMFromBuffer(buffer: NodeBuffer, bytesRead: number): string {
+	if (!buffer || bytesRead < 2) {
+		return null;
+	}
+
+	const b0 = buffer.readUInt8(0);
+	const b1 = buffer.readUInt8(1);
+
+	// UTF-16 BE
+	if (b0 === 0xFE && b1 === 0xFF) {
+		return UTF16be;
+	}
+
+	// UTF-16 LE
+	if (b0 === 0xFF && b1 === 0xFE) {
+		return UTF16le;
+	}
+
+	if (bytesRead < 3) {
+		return null;
+	}
+
+	const b2 = buffer.readUInt8(2);
+
+	// UTF-8
+	if (b0 === 0xEF && b1 === 0xBB && b2 === 0xBF) {
+		return UTF8;
+	}
+
+	return null;
+}
+
+/**
+ * Detects the Byte Order Mark in a given file.
+ * If no BOM is detected, null will be passed to callback.
+ */
+export function detectEncodingByBOM(file: string): TPromise<string> {
+	return stream.readExactlyByFile(file, 3).then(({ buffer, bytesRead }) => detectEncodingByBOMFromBuffer(buffer, bytesRead));
+}
+
+const MINIMUM_THRESHOLD = 0.2;
+
+const IGNORE_ENCODINGS = ['ascii', 'utf-8', 'utf-16', 'utf-32'];
+const MAPPED_ENCODINGS = {
+	'ibm866': 'cp866'
+};
+
+/**
+ * Guesses the encoding from buffer.
+ */
+export async function guessEncodingByBuffer(buffer: NodeBuffer): TPromise<string> {
+
+	const jschardet = await import('jschardet');
+
+	jschardet.Constants.MINIMUM_THRESHOLD = MINIMUM_THRESHOLD;
+
+	const guessed = jschardet.detect(buffer);
+	if (!guessed || !guessed.encoding) {
+		return null;
+	}
+
+	const enc = guessed.encoding.toLowerCase();
+
+	// Ignore encodings that cannot guess correctly
+	// (http://chardet.readthedocs.io/en/latest/supported-encodings.html)
+	if (0 <= IGNORE_ENCODINGS.indexOf(enc)) {
+		return null;
+	}
+
+	return toIconvLiteEncoding(guessed.encoding);
+}
+
+function toIconvLiteEncoding(encodingName: string): string {
+	const normalizedEncodingName = encodingName.replace(/[^a-zA-Z0-9]/g, '').toLowerCase();
+	const mapped = MAPPED_ENCODINGS[normalizedEncodingName];
+
+	return mapped || normalizedEncodingName;
+}
+
+/**
+ * The encodings that are allowed in a settings file don't match the canonical encoding labels specified by WHATWG.
+ * See https://encoding.spec.whatwg.org/#names-and-labels
+ * Iconv-lite strips all non-alphanumeric characters, but ripgrep doesn't. For backcompat, allow these labels.
+ */
+export function toCanonicalName(enc: string): string {
+	switch (enc) {
+		case 'shiftjis':
+			return 'shift-jis';
+		case 'utf16le':
+			return 'utf-16le';
+		case 'utf16be':
+			return 'utf-16be';
+		case 'big5hkscs':
+			return 'big5-hkscs';
+		case 'eucjp':
+			return 'euc-jp';
+		case 'euckr':
+			return 'euc-kr';
+		case 'koi8r':
+			return 'koi8-r';
+		case 'koi8u':
+			return 'koi8-u';
+		case 'macroman':
+			return 'x-mac-roman';
+		case 'utf8bom':
+			return 'utf8';
+		default:
+			const m = enc.match(/windows(\d+)/);
+			if (m) {
+				return 'windows-' + m[1];
+			}
+
+			return enc;
+	}
+}