diff --git a/src/sql/workbench/contrib/notebook/browser/htmlMarkdownConverter.ts b/src/sql/workbench/contrib/notebook/browser/htmlMarkdownConverter.ts index ba8cb7c1fb..cc7d7b5ba4 100644 --- a/src/sql/workbench/contrib/notebook/browser/htmlMarkdownConverter.ts +++ b/src/sql/workbench/contrib/notebook/browser/htmlMarkdownConverter.ts @@ -8,6 +8,28 @@ import { URI } from 'vs/base/common/uri'; import * as path from 'vs/base/common/path'; import * as turndownPluginGfm from 'sql/workbench/contrib/notebook/browser/turndownPluginGfm'; +// These replacements apply only to text. Here's how it's handled from Turndown: +// if (node.nodeType === 3) { +// replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue); +// } +const markdownReplacements = [ + [/\\/g, '\\\\'], + [/\*/g, '\\*'], + [/^-/g, '\\-'], + [/^\+ /g, '\\+ '], + [/^(=+)/g, '\\$1'], + [/^(#{1,6}) /g, '\\$1 '], + [/`/g, '\\`'], + [/^~~~/g, '\\~~~'], + [/\[/g, '\\['], + [/\]/g, '\\]'], + [/^>/g, '\\>'], + [/_/g, '\\_'], + [/^(\d+)\. /g, '$1\\. '], + [/ is escaped + [/>/g, '\\>'], // Added to ensure sample text like is escaped +]; + export class HTMLMarkdownConverter { private turndownService: TurndownService; @@ -21,7 +43,7 @@ export class HTMLMarkdownConverter { } private setTurndownOptions() { - this.turndownService.keep(['u', 'mark', 'style']); + this.turndownService.keep(['style']); this.turndownService.use(turndownPluginGfm.gfm); this.turndownService.addRule('pre', { filter: 'pre', @@ -29,6 +51,22 @@ export class HTMLMarkdownConverter { return '\n```\n' + node.textContent + '\n```\n'; } }); + this.turndownService.addRule('mark', { + filter: 'mark', + replacement: (content, node) => { + return '' + content + ''; + } + }); + this.turndownService.addRule('underline', { + filter: ['u'], + replacement: (content, node, options) => { + if (!content.trim()) { + return ''; + } + content = addHighlightIfYellowBgExists(node, content); + return '' + content + ''; + } + }); this.turndownService.addRule('caption', { filter: 'caption', replacement: function (content, node) { @@ -39,7 +77,6 @@ export class HTMLMarkdownConverter { this.turndownService.addRule('span', { filter: 'span', replacement: function (content, node) { - let escapedText = escapeAngleBrackets(node.textContent); // There are certain properties that either don't have equivalents in markdown or whose transformations // don't have actions defined in WYSIWYG yet. To unblock users, leaving these elements alone (including their child elements) // Note: the initial list was generated from our TSG Jupyter Book @@ -75,7 +112,7 @@ export class HTMLMarkdownConverter { beginString = '' + beginString; endString += ''; } - return beginString + escapedText + endString; + return beginString + content + endString; } }); this.turndownService.addRule('img', { @@ -100,8 +137,6 @@ export class HTMLMarkdownConverter { const notebookLink = node.href ? URI.parse(node.href) : URI.file(node.title); const notebookFolder = this.notebookUri ? path.join(path.dirname(this.notebookUri.fsPath), path.sep) : ''; let relativePath = findPathRelativeToContent(notebookFolder, notebookLink); - node.innerText = escapeAngleBrackets(node.innerText); - content = escapeAngleBrackets(content); if (relativePath) { return `[${node.innerText}](${relativePath})`; } @@ -115,7 +150,6 @@ export class HTMLMarkdownConverter { .replace(/^\n+/, '') // remove leading newlines .replace(/\n+$/, '\n') // replace trailing newlines with just a single one .replace(/\n/gm, '\n '); // indent - content = escapeAngleBrackets(content); let prefix = options.bulletListMarker + ' '; let parent = node.parentNode; let nestedCount = 0; @@ -135,44 +169,22 @@ export class HTMLMarkdownConverter { ); } }); - this.turndownService.addRule('p', { - filter: 'p', - replacement: function (content, node) { - let isAnchorElement: boolean = false; - node.childNodes.forEach(c => { - if (c.nodeType === Node.TEXT_NODE) { - c.nodeValue = escapeAngleBrackets(c.textContent); - } else if (c.nodeType === Node.ELEMENT_NODE) { - c.innerText = escapeAngleBrackets(c.textContent); - if (c.nodeName === 'A') { - isAnchorElement = true; - } - } - }); - if (isAnchorElement) { - return content; - } else { - return '\n\n' + node.innerHTML.replace(/</gi, '<').replace(/>/gi, '>').replace(/ /gi, '') + '\n\n'; - } - } - }); this.turndownService.addRule('heading', { filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'], replacement: function (content, node, options) { let hLevel = Number(node.nodeName.charAt(1)); - let escapedText = escapeAngleBrackets(content); if (options.headingStyle === 'setext' && hLevel < 3) { let underline = '#'.repeat(hLevel); - return '\n\n' + escapedText + '\n' + underline + '\n\n'; + return '\n\n' + content + '\n' + underline + '\n\n'; } else { - return '\n\n' + '#'.repeat(hLevel) + ' ' + escapedText + '\n\n'; + return '\n\n' + '#'.repeat(hLevel) + ' ' + content + '\n\n'; } } }); this.turndownService.addRule('bold', { filter: ['strong', 'b'], replacement: function (content, node, options) { - content = escapeAngleBrackets(content); + content = addHighlightIfYellowBgExists(node, content); if (!content.trim()) { return ''; } return options.strongDelimiter + content + options.strongDelimiter; } @@ -180,7 +192,7 @@ export class HTMLMarkdownConverter { this.turndownService.addRule('italicize', { filter: ['em', 'i'], replacement: function (content, node, options) { - content = escapeAngleBrackets(content); + content = addHighlightIfYellowBgExists(node, content); if (!content.trim()) { return ''; } return options.emDelimiter + content + options.emDelimiter; } @@ -192,8 +204,7 @@ export class HTMLMarkdownConverter { return node.nodeName === 'CODE' && !isCodeBlock; }, - replacement: function (content) { - content = escapeAngleBrackets(content); + replacement: function (content, node, options) { if (!content.trim()) { return ''; } let delimiter = '`'; @@ -209,9 +220,17 @@ export class HTMLMarkdownConverter { return delimiter + leadingSpace + content + trailingSpace + delimiter; } }); + this.turndownService.escape = escapeMarkdown; } } +function escapeMarkdown(text) { + return markdownReplacements.reduce( + (search, replacement) => search.replace(replacement[0], replacement[1]), + text, + ); +} + export function findPathRelativeToContent(notebookFolder: string, contentPath: URI | undefined): string { if (notebookFolder) { if (contentPath?.scheme === 'file') { @@ -229,15 +248,9 @@ export function findPathRelativeToContent(notebookFolder: string, contentPath: U return ''; } -export function escapeAngleBrackets(textContent: string): string { - let text: string = textContent; - if (text.includes('') || text.includes('') || (text.includes('style') && !text.includes('