Fix for < > (non HTML) tags disappearing in WYSIWYG (#13267)

* Push the latest update for WYSIWYG bug

* Improvements to nested lists

* OL tests and PR feedback

* Fixed all toolbar options for tags

* Address PR comments

* Ensure style is kept and not escaped

* Add all markdown toolbar action tests

* Style text edge case fix

* Address repeat function and type comment

* add more clarifying test
This commit is contained in:
Vasu Bhog
2020-11-09 18:26:28 -08:00
committed by GitHub
parent 689c7ab27e
commit 7cd4964f35
2 changed files with 118 additions and 1 deletions

View File

@@ -39,6 +39,7 @@ export class HTMLMarkdownConverter {
this.turndownService.addRule('span', {
filter: 'span',
replacement: function (content, node) {
let escapedText = escapeAngleBrackets(node.textContent);
// There are certain properties that either don't have equivalents in markdown or whose transformations
// don't have actions defined in WYSIWYG yet. To unblock users, leaving these elements alone (including their child elements)
// Note: the initial list was generated from our TSG Jupyter Book
@@ -74,7 +75,7 @@ export class HTMLMarkdownConverter {
beginString = '<u>' + beginString;
endString += '</u>';
}
return beginString + content + endString;
return beginString + escapedText + endString;
}
});
this.turndownService.addRule('img', {
@@ -99,6 +100,7 @@ export class HTMLMarkdownConverter {
const notebookLink = node.href ? URI.parse(node.href) : URI.file(node.title);
const notebookFolder = this.notebookUri ? path.join(path.dirname(this.notebookUri.fsPath), path.sep) : '';
let relativePath = findPathRelativeToContent(notebookFolder, notebookLink);
node.innerText = escapeAngleBrackets(node.innerText);
if (relativePath) {
return `[${node.innerText}](${relativePath})`;
}
@@ -112,6 +114,7 @@ export class HTMLMarkdownConverter {
.replace(/^\n+/, '') // remove leading newlines
.replace(/\n+$/, '\n') // replace trailing newlines with just a single one
.replace(/\n/gm, '\n '); // indent
content = escapeAngleBrackets(content);
let prefix = options.bulletListMarker + ' ';
let parent = node.parentNode;
let nestedCount = 0;
@@ -131,6 +134,72 @@ export class HTMLMarkdownConverter {
);
}
});
this.turndownService.addRule('p', {
filter: 'p',
replacement: function (content, node) {
node.childNodes.forEach(c => {
if (c.nodeType === Node.TEXT_NODE) {
c.nodeValue = escapeAngleBrackets(c.textContent);
} else if (c.nodeType === Node.ELEMENT_NODE) {
c.innerText = escapeAngleBrackets(c.textContent);
}
});
return '\n\n' + node.innerHTML.replace(/&lt;/gi, '<').replace(/&gt;/gi, '>').replace(/&nbsp;/gi, '') + '\n\n';
}
});
this.turndownService.addRule('heading', {
filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
replacement: function (content, node, options) {
let hLevel = Number(node.nodeName.charAt(1));
let escapedText = escapeAngleBrackets(content);
if (options.headingStyle === 'setext' && hLevel < 3) {
let underline = '#'.repeat(hLevel);
return '\n\n' + escapedText + '\n' + underline + '\n\n';
} else {
return '\n\n' + '#'.repeat(hLevel) + ' ' + escapedText + '\n\n';
}
}
});
this.turndownService.addRule('bold', {
filter: ['strong', 'b'],
replacement: function (content, node, options) {
content = escapeAngleBrackets(content);
if (!content.trim()) { return ''; }
return options.strongDelimiter + content + options.strongDelimiter;
}
});
this.turndownService.addRule('italicize', {
filter: ['em', 'i'],
replacement: function (content, node, options) {
content = escapeAngleBrackets(content);
if (!content.trim()) { return ''; }
return options.emDelimiter + content + options.emDelimiter;
}
});
this.turndownService.addRule('code', {
filter: function (node) {
let hasSiblings = node.previousSibling || node.nextSibling;
let isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
return node.nodeName === 'CODE' && !isCodeBlock;
},
replacement: function (content) {
content = escapeAngleBrackets(content);
if (!content.trim()) { return ''; }
let delimiter = '`';
let leadingSpace = '';
let trailingSpace = '';
let matches = content.match(/`+/gm);
if (matches) {
if (/^`/.test(content)) { leadingSpace = ' '; }
if (/`$/.test(content)) { trailingSpace = ' '; }
while (matches.indexOf(delimiter) !== -1) { delimiter = delimiter + '`'; }
}
return delimiter + leadingSpace + content + trailingSpace + delimiter;
}
});
}
}
@@ -150,3 +219,16 @@ export function findPathRelativeToContent(notebookFolder: string, contentPath: U
}
return '';
}
export function escapeAngleBrackets(textContent: string): string {
let text: string = textContent;
if (text.includes('<u>') || text.includes('<mark>') || (text.includes('style') && !text.includes('<style>'))) {
return text;
}
let mapTags = { '<': '\\<', '>': '\\>' };
let escapedText = text.replace(/<|>/gi, function (matched) {
return mapTags[matched];
});
return escapedText;
}

View File

@@ -138,6 +138,7 @@ suite('HTML Markdown Converter', function (): void {
htmlString = '<a href="http://www.microsoft.com/images/msft.png">msft</a>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '[msft](http://www.microsoft.com/images/msft.png)', 'Basic http link test failed');
});
test('Should transform <li> tags', () => {
htmlString = '<ul><li>Test</li></ul>';
assert.equal(htmlMarkdownConverter.convert(htmlString), `- Test`, 'Basic unordered list test failed');
@@ -153,6 +154,40 @@ suite('HTML Markdown Converter', function (): void {
assert.equal(htmlMarkdownConverter.convert(htmlString), `1. Test\n 1. Test2\n2. Test3`, 'Basic ordered item test failed');
});
test('Should keep < > tag', () => {
htmlString = '&lt;test&gt';
assert.equal(htmlMarkdownConverter.convert(htmlString), '<test>', 'Non-HTML tag test failed to escape');
htmlString = '&lt;test&gt<span style="background:red">message</span>&lt;test&gt';
assert.equal(htmlMarkdownConverter.convert(htmlString), '<test><span style="background:red">message</span><test>', 'Non-HTML tag inside span tag test failed to escape');
htmlString = '<h1>&lt;test&gt;<h1>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '# \\<test\\>', 'Non-HTML tag inside H1 tag test failed to escape');
htmlString = '<h2>&lt;test&gt;<h2>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '## \\<test\\>', 'Non-HTML tag inside H2 tag test failed to escape');
htmlString = '<h3>&lt;test&gt;<h3>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '### \\<test\\>', 'Non-HTML tag inside H3 tag test failed to escape');
htmlString = '<a href="https://www.microsoft.com/images/msft.png">&lt;msft&gt</a>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '[\\<msft\\>](https://www.microsoft.com/images/msft.png)', 'Non-HTML tag as link test failed to escape');
htmlString = '<strong>&lt;Bold test&gt;</strong>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '**\\<Bold test\\>**', 'Basic bold non-HTML tag test failed to escape');
htmlString = '<em>&lt;Italicize test&gt;</em>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '_\\<Italicize test\\>_', 'Basic italicize non-HTML tag test failed to escape');
htmlString = '<u>&lt;Underline_test&gt;</u> ';
assert.equal(htmlMarkdownConverter.convert(htmlString), '<u>&lt;Underline_test&gt;</u>', 'Basic underline non-HTML tag test failed to escape');
htmlString = '<ul><li>&lt;test&gt;</li></ul>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '- \\<test\\>', 'Basic unordered list non-HTML tag item test failed to escape');
htmlString = '<ol><li>&lt;test&gt;</li></ol>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '1. \\<test\\>', 'Basic ordered list non-HTML tag item test failed to escape');
htmlString = '<mark>&lt;test&gt;</mark>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '<mark>&lt;test&gt;</mark>', 'Basic highlighting Non-HTML tag test failed to escape');
htmlString = '<mark><h1>&lt;test&gt;</h1></mark>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '<mark><h1>&lt;test&gt;</h1></mark>', 'Non-HTML tag inside multiple html tags test failed to escape');
htmlString = '<p>&lt;style&gt</p>';
assert.equal(htmlMarkdownConverter.convert(htmlString), '\\<style\\>', 'Style tag as a non-HTML tag test failed to escape');
htmlString = '&lt;test&gt <u>Underlined Text style</u> end';
assert.equal(htmlMarkdownConverter.convert(htmlString), '<test> <u>Underlined Text style</u> end', 'Non-HTML tag outside with style and underline test failed to escape');
});
test('Should transform table with no header', () => {
htmlString = '<table>\n<thead>\n<tr>\n<th></th>\n<th></th>\n<th></th>\n</tr>\n</thead>\n<tbody><tr>\n<td>test</td>\n<td>test</td>\n<td>test</td>\n</tr>\n<tr>\n<td>test</td>\n<td>test</td>\n<td>test</td>\n</tr>\n<tr>\n<td>test</td>\n<td>test</td>\n<td>test</td>\n</tr>\n<tr>\n<td>test</td>\n<td>test</td>\n<td>test</td>\n</tr>\n</tbody></table>\n';
assert.equal(htmlMarkdownConverter.convert(htmlString), `| | | |\n| --- | --- | --- |\n| test | test | test |\n| test | test | test |\n| test | test | test |\n| test | test | test |`, 'Table with no header failed');