WYSIWYG Improvements to highlight (#13032)

* Improvements to highlight

* wip

* Tests pass

* Leverage escaping mechanism

* Tweak highlight logic

* PR comments
This commit is contained in:
Chris LaFreniere
2020-12-02 15:51:40 -08:00
committed by GitHub
parent cb567989da
commit d86e1eec10
3 changed files with 149 additions and 51 deletions

View File

@@ -8,6 +8,28 @@ import { URI } from 'vs/base/common/uri';
import * as path from 'vs/base/common/path';
import * as turndownPluginGfm from 'sql/workbench/contrib/notebook/browser/turndownPluginGfm';
// These replacements apply only to text. Here's how it's handled from Turndown:
// if (node.nodeType === 3) {
// replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue);
// }
const markdownReplacements = [
[/\\/g, '\\\\'],
[/\*/g, '\\*'],
[/^-/g, '\\-'],
[/^\+ /g, '\\+ '],
[/^(=+)/g, '\\$1'],
[/^(#{1,6}) /g, '\\$1 '],
[/`/g, '\\`'],
[/^~~~/g, '\\~~~'],
[/\[/g, '\\['],
[/\]/g, '\\]'],
[/^>/g, '\\>'],
[/_/g, '\\_'],
[/^(\d+)\. /g, '$1\\. '],
[/</g, '\\<'], // Added to ensure sample text like <hello> is escaped
[/>/g, '\\>'], // Added to ensure sample text like <hello> is escaped
];
export class HTMLMarkdownConverter {
private turndownService: TurndownService;
@@ -21,7 +43,7 @@ export class HTMLMarkdownConverter {
}
private setTurndownOptions() {
this.turndownService.keep(['u', 'mark', 'style']);
this.turndownService.keep(['style']);
this.turndownService.use(turndownPluginGfm.gfm);
this.turndownService.addRule('pre', {
filter: 'pre',
@@ -29,6 +51,22 @@ export class HTMLMarkdownConverter {
return '\n```\n' + node.textContent + '\n```\n';
}
});
this.turndownService.addRule('mark', {
filter: 'mark',
replacement: (content, node) => {
return '<mark>' + content + '</mark>';
}
});
this.turndownService.addRule('underline', {
filter: ['u'],
replacement: (content, node, options) => {
if (!content.trim()) {
return '';
}
content = addHighlightIfYellowBgExists(node, content);
return '<u>' + content + '</u>';
}
});
this.turndownService.addRule('caption', {
filter: 'caption',
replacement: function (content, node) {
@@ -39,7 +77,6 @@ export class HTMLMarkdownConverter {
this.turndownService.addRule('span', {
filter: 'span',
replacement: function (content, node) {
let escapedText = escapeAngleBrackets(node.textContent);
// There are certain properties that either don't have equivalents in markdown or whose transformations
// don't have actions defined in WYSIWYG yet. To unblock users, leaving these elements alone (including their child elements)
// Note: the initial list was generated from our TSG Jupyter Book
@@ -75,7 +112,7 @@ export class HTMLMarkdownConverter {
beginString = '<u>' + beginString;
endString += '</u>';
}
return beginString + escapedText + endString;
return beginString + content + endString;
}
});
this.turndownService.addRule('img', {
@@ -100,8 +137,6 @@ export class HTMLMarkdownConverter {
const notebookLink = node.href ? URI.parse(node.href) : URI.file(node.title);
const notebookFolder = this.notebookUri ? path.join(path.dirname(this.notebookUri.fsPath), path.sep) : '';
let relativePath = findPathRelativeToContent(notebookFolder, notebookLink);
node.innerText = escapeAngleBrackets(node.innerText);
content = escapeAngleBrackets(content);
if (relativePath) {
return `[${node.innerText}](${relativePath})`;
}
@@ -115,7 +150,6 @@ export class HTMLMarkdownConverter {
.replace(/^\n+/, '') // remove leading newlines
.replace(/\n+$/, '\n') // replace trailing newlines with just a single one
.replace(/\n/gm, '\n '); // indent
content = escapeAngleBrackets(content);
let prefix = options.bulletListMarker + ' ';
let parent = node.parentNode;
let nestedCount = 0;
@@ -135,44 +169,22 @@ export class HTMLMarkdownConverter {
);
}
});
this.turndownService.addRule('p', {
filter: 'p',
replacement: function (content, node) {
let isAnchorElement: boolean = false;
node.childNodes.forEach(c => {
if (c.nodeType === Node.TEXT_NODE) {
c.nodeValue = escapeAngleBrackets(c.textContent);
} else if (c.nodeType === Node.ELEMENT_NODE) {
c.innerText = escapeAngleBrackets(c.textContent);
if (c.nodeName === 'A') {
isAnchorElement = true;
}
}
});
if (isAnchorElement) {
return content;
} else {
return '\n\n' + node.innerHTML.replace(/&lt;/gi, '<').replace(/&gt;/gi, '>').replace(/&nbsp;/gi, '') + '\n\n';
}
}
});
this.turndownService.addRule('heading', {
filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
replacement: function (content, node, options) {
let hLevel = Number(node.nodeName.charAt(1));
let escapedText = escapeAngleBrackets(content);
if (options.headingStyle === 'setext' && hLevel < 3) {
let underline = '#'.repeat(hLevel);
return '\n\n' + escapedText + '\n' + underline + '\n\n';
return '\n\n' + content + '\n' + underline + '\n\n';
} else {
return '\n\n' + '#'.repeat(hLevel) + ' ' + escapedText + '\n\n';
return '\n\n' + '#'.repeat(hLevel) + ' ' + content + '\n\n';
}
}
});
this.turndownService.addRule('bold', {
filter: ['strong', 'b'],
replacement: function (content, node, options) {
content = escapeAngleBrackets(content);
content = addHighlightIfYellowBgExists(node, content);
if (!content.trim()) { return ''; }
return options.strongDelimiter + content + options.strongDelimiter;
}
@@ -180,7 +192,7 @@ export class HTMLMarkdownConverter {
this.turndownService.addRule('italicize', {
filter: ['em', 'i'],
replacement: function (content, node, options) {
content = escapeAngleBrackets(content);
content = addHighlightIfYellowBgExists(node, content);
if (!content.trim()) { return ''; }
return options.emDelimiter + content + options.emDelimiter;
}
@@ -192,8 +204,7 @@ export class HTMLMarkdownConverter {
return node.nodeName === 'CODE' && !isCodeBlock;
},
replacement: function (content) {
content = escapeAngleBrackets(content);
replacement: function (content, node, options) {
if (!content.trim()) { return ''; }
let delimiter = '`';
@@ -209,9 +220,17 @@ export class HTMLMarkdownConverter {
return delimiter + leadingSpace + content + trailingSpace + delimiter;
}
});
this.turndownService.escape = escapeMarkdown;
}
}
function escapeMarkdown(text) {
return markdownReplacements.reduce(
(search, replacement) => search.replace(replacement[0], replacement[1]),
text,
);
}
export function findPathRelativeToContent(notebookFolder: string, contentPath: URI | undefined): string {
if (notebookFolder) {
if (contentPath?.scheme === 'file') {
@@ -229,15 +248,9 @@ export function findPathRelativeToContent(notebookFolder: string, contentPath: U
return '';
}
export function escapeAngleBrackets(textContent: string): string {
let text: string = textContent;
if (text.includes('<u>') || text.includes('<mark>') || (text.includes('style') && !text.includes('<style>'))) {
return text;
export function addHighlightIfYellowBgExists(node, content: string): string {
if (node?.style?.backgroundColor === 'yellow') {
return '<mark>' + content + '</mark>';
}
let mapTags = { '<': '\\<', '>': '\\>' };
let escapedText = text.replace(/<|>/gi, function (matched) {
return mapTags[matched];
});
return escapedText;
return content;
}

View File

@@ -65,7 +65,44 @@ export class TransformMarkdownAction extends Action {
document.execCommand('formatBlock', false, 'H3');
break;
case MarkdownButtonType.HIGHLIGHT:
document.execCommand('hiliteColor', false, 'Yellow');
let selectionFocusNode = document.getSelection()?.focusNode;
// Find if element is wrapped in <mark></mark>
while (selectionFocusNode?.parentNode?.nodeName?.toLowerCase() && selectionFocusNode?.parentNode?.nodeName?.toLowerCase() !== 'mark') {
selectionFocusNode = selectionFocusNode.parentNode;
}
// Find if element is wrapped in <span background-color="yellow">
if (selectionFocusNode?.parentNode?.nodeName?.toLowerCase() !== 'mark') {
selectionFocusNode = document.getSelection()?.focusNode;
while (selectionFocusNode?.parentNode?.nodeName?.toLowerCase() && selectionFocusNode?.parentNode?.nodeName?.toLowerCase() !== 'span' && selectionFocusNode?.parentElement?.style?.backgroundColor !== 'yellow') {
selectionFocusNode = selectionFocusNode.parentNode;
}
}
let nodeName = selectionFocusNode?.parentNode?.nodeName?.toLowerCase();
let backgroundColor = selectionFocusNode?.parentElement?.style?.backgroundColor;
if (nodeName === 'mark') {
let oldParent = selectionFocusNode.parentNode;
let newParent = selectionFocusNode.parentNode.parentNode;
let oldParentNextSibling = oldParent.nextSibling;
// Remove mark element, reparent
while (oldParent.childNodes.length > 0) {
// If no next sibling, then old parent was the final child node, so we can append
if (!oldParentNextSibling) {
newParent.appendChild(oldParent.firstChild);
} else {
newParent.insertBefore(oldParent.firstChild, oldParentNextSibling);
}
}
// Empty span required to force an input so that HTML change is seen from text cell component
// This span doesn't have any effect on the markdown generated.
document.execCommand('formatBlock', false, 'span');
} else if (selectionFocusNode?.parentNode?.nodeName?.toLowerCase() === 'span' && backgroundColor === 'yellow') {
selectionFocusNode.parentElement.style.backgroundColor = '';
// Empty span required to force an input so that HTML change is seen from text cell component
// This span doesn't have any effect on the markdown generated.
document.execCommand('formatBlock', false, 'span');
} else {
document.execCommand('hiliteColor', false, 'Yellow');
}
break;
case MarkdownButtonType.IMAGE:
// TODO

View File

@@ -79,6 +79,10 @@ suite('HTML Markdown Converter', function (): void {
assert.equal(htmlMarkdownConverter.convert(htmlString), 'Yes<u>Hello test</u>', 'Basic underline span no space failed');
htmlString = '<h1>Yes<span style="text-decoration-line:underline; font-style:italic; font-weight:bold; background-color: yellow">Hello test</span></h1>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '# Yes<u>_**<mark>Hello test</mark>**_</u>', 'Compound elements span failed');
htmlString = '<span style="background-color: yellow;"><b>Hello test</b></span>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '<mark>**Hello test**</mark>', 'Span with inner html not parsed correctly');
htmlString = '<b><span style="background-color: yellow;">Hello test</span></b>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '**<mark>Hello test</mark>**', 'Span inside bold tag parsed correctly');
htmlString = '<span style="color: orangered">Hello test</span>';
assert.equal(htmlMarkdownConverter.convert(htmlString), htmlString, 'Span with color style should not be altered');
htmlString = '<span style="font-size: 10.0pt">Hello test</span>';
@@ -158,9 +162,9 @@ suite('HTML Markdown Converter', function (): void {
test('Should keep < > tag', () => {
htmlString = '&lt;test&gt';
assert.equal(htmlMarkdownConverter.convert(htmlString), '<test>', 'Non-HTML tag test failed to escape');
assert.equal(htmlMarkdownConverter.convert(htmlString), '\\<test\\>', 'Non-HTML tag test failed to escape');
htmlString = '&lt;test&gt<span style="background:red">message</span>&lt;test&gt';
assert.equal(htmlMarkdownConverter.convert(htmlString), '<test><span style="background:red">message</span><test>', 'Non-HTML tag inside span tag test failed to escape');
assert.equal(htmlMarkdownConverter.convert(htmlString), '\\<test\\><span style="background:red">message</span>\\<test\\>', 'Non-HTML tag inside span tag test failed to escape');
htmlString = '<h1>&lt;test&gt;<h1>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '# \\<test\\>', 'Non-HTML tag inside H1 tag test failed to escape');
htmlString = '<h2>&lt;test&gt;<h2>';
@@ -174,19 +178,19 @@ suite('HTML Markdown Converter', function (): void {
htmlString = '<em>&lt;Italicize test&gt;</em>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '_\\<Italicize test\\>_', 'Basic italicize non-HTML tag test failed to escape');
htmlString = '<u>&lt;Underline_test&gt;</u> ';
assert.equal(htmlMarkdownConverter.convert(htmlString), '<u>&lt;Underline_test&gt;</u>', 'Basic underline non-HTML tag test failed to escape');
assert.equal(htmlMarkdownConverter.convert(htmlString), '<u>\\<Underline\\_test\\></u>', 'Basic underline non-HTML tag test failed to escape');
htmlString = '<ul><li>&lt;test&gt;</li></ul>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '- \\<test\\>', 'Basic unordered list non-HTML tag item test failed to escape');
htmlString = '<ol><li>&lt;test&gt;</li></ol>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '1. \\<test\\>', 'Basic ordered list non-HTML tag item test failed to escape');
htmlString = '<mark>&lt;test&gt;</mark>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '<mark>&lt;test&gt;</mark>', 'Basic highlighting Non-HTML tag test failed to escape');
assert.equal(htmlMarkdownConverter.convert(htmlString), '<mark>\\<test\\></mark>', 'Basic highlighting Non-HTML tag test failed to escape');
htmlString = '<mark><h1>&lt;test&gt;</h1></mark>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '<mark><h1>&lt;test&gt;</h1></mark>', 'Non-HTML tag inside multiple html tags test failed to escape');
assert.equal(htmlMarkdownConverter.convert(htmlString), '<mark>\n\n# \\<test\\>\n\n</mark>', 'Non-HTML tag inside multiple html tags test failed to escape');
htmlString = '<p>&lt;style&gt</p>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '\\<style\\>', 'Style tag as a non-HTML tag test failed to escape');
htmlString = '&lt;test&gt <u>Underlined Text style</u> end';
assert.equal(htmlMarkdownConverter.convert(htmlString), '<test> <u>Underlined Text style</u> end', 'Non-HTML tag outside with style and underline test failed to escape');
assert.equal(htmlMarkdownConverter.convert(htmlString), '\\<test\\> <u>Underlined Text style</u> end', 'Non-HTML tag outside with style and underline test failed to escape');
});
@@ -204,4 +208,48 @@ suite('HTML Markdown Converter', function (): void {
htmlString = '<table>\n<thead>\n<tr>\n<th>Test</th>\n<th>Test</th>\n<th>Test</th>\n</tr>\n</thead>\n<tbody><tr>\n<td>test</td>\n<td>test</td>\n<td>test</td>\n</tr>\n<tr>\n<td>test</td>\n<td>test</td>\n<td>test</td>\n</tr>\n<tr>\n<td>test</td>\n<td>test</td>\n<td>test</td>\n</tr>\n<tr>\n<td>test</td>\n<td>test</td>\n<td>test</td>\n</tr>\n</tbody></table>\n';
assert.equal(htmlMarkdownConverter.convert(htmlString), `| Test | Test | Test |\n| --- | --- | --- |\n| test | test | test |\n| test | test | test |\n| test | test | test |\n| test | test | test |`, 'Table with header failed');
});
test('Should transform <b> and <strong> tags', () => {
htmlString = '<b>test string</b>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '**test string**', 'Basic bold test failed');
htmlString = '<b style="background-color: yellow">test string</b>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '**<mark>test string</mark>**', 'Highlight bold test failed');
htmlString = '<b style="background-color: yellow"><i>test string</i></b>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '**<mark>_test string_</mark>**', 'Highlight bold italic test failed');
htmlString = '<b style="blah: nothing">test string</b>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '**test string**', 'Incorrect style bold test failed');
htmlString = '<strong>test string</strong>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '**test string**', 'Basic strong test failed');
htmlString = '<strong style="background-color: yellow">test string</strong>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '**<mark>test string</mark>**', 'Highlight strong test failed');
htmlString = '<strong style="blah: nothing">test string</strong>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '**test string**', 'Incorrect style strong test failed');
});
test('Should transform <i> and <em> tags', () => {
htmlString = '<i>test string</i>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '_test string_', 'Basic italic test failed');
htmlString = '<p><i>test string</i></p>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '_test string_', 'Basic italic test failed');
htmlString = '<i style="background-color: yellow">test string</i>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '_<mark>test string</mark>_', 'Highlight italic test failed');
htmlString = '<i style="background-color: yellow"><b>test string</b></i>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '_<mark>**test string**</mark>_', 'Highlight italic bold test failed');
htmlString = '<i style="blah: nothing">test string</i>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '_test string_', 'Incorrect style italic test failed');
htmlString = '<em>test string</em>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '_test string_', 'Basic em test failed');
htmlString = '<em style="background-color: yellow">test string</em>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '_<mark>test string</mark>_', 'Highlight em test failed');
htmlString = '<em style="blah: nothing">test string</em>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '_test string_', 'Incorrect style em test failed');
htmlString = '<em style="background-color: yellow"><b>test string</b></em>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '_<mark>**test string**</mark>_', 'Highlight em bold test failed');
});
test('Should transform <u> when necessary', () => {
htmlString = '<u>test string</u>';
assert.equal(htmlMarkdownConverter.convert(htmlString), htmlString, 'Basic underline test failed');
htmlString = '<u style="background-color: yellow">test string</u>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '<u><mark>test string</mark></u>', 'Highlight underline test failed');
htmlString = '<b><u style="background-color: yellow">test string</u></b>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '**<u><mark>test string</mark></u>**', 'Underline as inner element failed');
});
});