Files
azuredatastudio/src/vs/base/node/mime.ts
Karl Burtram 251ae01c3e Initial VS Code 1.19 source merge (#571)
* Initial 1.19 xcopy

* Fix yarn build

* Fix numerous build breaks

* Next batch of build break fixes

* More build break fixes

* Runtime breaks

* Additional post merge fixes

* Fix windows setup file

* Fix test failures.

* Update license header blocks to refer to source eula
2018-01-28 23:37:17 -08:00

106 lines
3.9 KiB
TypeScript

/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the Source EULA. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
'use strict';
import mime = require('vs/base/common/mime');
import { TPromise } from 'vs/base/common/winjs.base';
import stream = require('vs/base/node/stream');
import encoding = require('vs/base/node/encoding');
/**
* Lots of binary file types exists where the type can be determined by matching the first few bytes against some "magic patterns".
* E.g. PDF files always start with %PDF- and the rest of the file contains mostly text, but sometimes binary data (for fonts and images).
* In order to detect these types correctly (and independently from the file's extension), the content base mime type detection must be performed
* on any file, not only on text files.
*
* Here is the original mime type detection in pseudocode:
*
* let mimes = [];
*
* read file extension
*
* if (file extension matches) {
* if (file extension is bogus) {
* // ignore.
* // this covers *.manifest files which can contain arbitrary content, so the extension is of no value.
* // a consequence of this is that the content based mime type becomes the most specific type in the array
* } else {
* mimes.push(associated mime type) // first element: most specific
* }
* }
*
* read file contents
*
* if (content based match found) { // this is independent from text or binary
* mimes.push(associated mime type)
* if (a second mime exists for the match) { // should be rare; text/plain should never be included here
* // e.g. for svg: ['image/svg+xml', 'application/xml']
* mimes.push(second mime)
* }
* }
*
* if (content == text)
* mimes.push('text/plain') // last element: least specific
* else
* mimes.push('application/octet-stream') // last element: least specific
*/
const ZERO_BYTE_DETECTION_BUFFER_MAX_LEN = 512; // number of bytes to look at to decide about a file being binary or not
const NO_GUESS_BUFFER_MAX_LEN = 512; // when not auto guessing the encoding, small number of bytes are enough
const AUTO_GUESS_BUFFER_MAX_LEN = 512 * 8; // with auto guessing we want a lot more content to be read for guessing
export function maxBufferLen(arg1?: DetectMimesOption | boolean): number {
let autoGuessEncoding: boolean;
if (typeof arg1 === 'boolean') {
autoGuessEncoding = arg1;
} else {
autoGuessEncoding = arg1 && arg1.autoGuessEncoding;
}
return autoGuessEncoding ? AUTO_GUESS_BUFFER_MAX_LEN : NO_GUESS_BUFFER_MAX_LEN;
}
export interface IMimeAndEncoding {
encoding: string;
mimes: string[];
}
export interface DetectMimesOption {
autoGuessEncoding?: boolean;
}
export function detectMimeAndEncodingFromBuffer(readResult: stream.ReadResult, autoGuessEncoding?: false): IMimeAndEncoding;
export function detectMimeAndEncodingFromBuffer(readResult: stream.ReadResult, autoGuessEncoding?: boolean): TPromise<IMimeAndEncoding>;
export function detectMimeAndEncodingFromBuffer({ buffer, bytesRead }: stream.ReadResult, autoGuessEncoding?: boolean): TPromise<IMimeAndEncoding> | IMimeAndEncoding {
let enc = encoding.detectEncodingByBOMFromBuffer(buffer, bytesRead);
// Detect 0 bytes to see if file is binary (ignore for UTF 16 though)
let isText = true;
if (enc !== encoding.UTF16be && enc !== encoding.UTF16le) {
for (let i = 0; i < bytesRead && i < ZERO_BYTE_DETECTION_BUFFER_MAX_LEN; i++) {
if (buffer.readInt8(i) === 0) {
isText = false;
break;
}
}
}
if (autoGuessEncoding && isText && !enc) {
return encoding.guessEncodingByBuffer(buffer.slice(0, bytesRead)).then(enc => {
return {
mimes: isText ? [mime.MIME_TEXT] : [mime.MIME_BINARY],
encoding: enc
};
});
}
return {
mimes: isText ? [mime.MIME_TEXT] : [mime.MIME_BINARY],
encoding: enc
};
}