fix: non-printable characters in XML (#6952)

* fix: non-printable characters in XMl

* fix: PR comments

* chore: format

* chore: move to module-level parser and serializer

* chore: reorganize textToDom

* chore: add dummy implementations of domParser and xmlSerializer

* chore: properly check classes before constructing

* chore: fix tests

* chore: PR comments

* chore: remove null char from tests

* chore: docs!
This commit is contained in:
Beka Westberg
2023-04-17 16:05:09 -07:00
committed by GitHub
parent 20f0c937cd
commit edc5843c4c
3 changed files with 125 additions and 22 deletions

View File

@@ -10,6 +10,22 @@ goog.declareModuleId('Blockly.utils.xml');
import * as deprecation from './deprecation.js';
let domParser: DOMParser = {
parseFromString: function() {
throw new Error(
'DOMParser was not found in the global scope and was not properly ' +
'injected using injectDependencies');
},
};
let xmlSerializer: XMLSerializer = {
serializeToString: function() {
throw new Error(
'XMLSerializer was not foundin the global scope and was not properly ' +
'injected using injectDependencies');
},
};
/**
* Injected dependencies. By default these are just (and have the
* same types as) the corresponding DOM Window properties, but the
@@ -18,6 +34,8 @@ import * as deprecation from './deprecation.js';
* package instead.
*/
let {document, DOMParser, XMLSerializer} = globalThis;
if (DOMParser) domParser = new DOMParser();
if (XMLSerializer) xmlSerializer = new XMLSerializer();
/**
* Inject implementations of document, DOMParser and/or XMLSerializer
@@ -50,6 +68,9 @@ export function injectDependencies(dependencies: {
DOMParser = DOMParser,
XMLSerializer = XMLSerializer,
} = dependencies);
domParser = new DOMParser();
xmlSerializer = new XMLSerializer();
}
/**
@@ -57,6 +78,9 @@ export function injectDependencies(dependencies: {
*/
export const NAME_SPACE = 'https://developers.google.com/blockly/xml';
// eslint-disable-next-line no-control-regex
const INVALID_CONTROL_CHARS = /[\x00-\x09\x0B\x0C\x0E-\x1F]/g;
/**
* Get the document object to use for XML serialization.
*
@@ -102,18 +126,35 @@ export function createTextNode(text: string): Text {
/**
* Converts an XML string into a DOM structure.
*
* Control characters should be escaped. (But we will try to best-effort parse
* unescaped characters.)
*
* Note that even when escaped, U+0000 will be parsed as U+FFFD (the
* "replacement character") because U+0000 is never a valid XML character
* (even in XML 1.1).
* https://www.w3.org/TR/xml11/#charsets
*
* @param text An XML string.
* @returns A DOM object representing the singular child of the document
* element.
* @throws if the text doesn't parse.
*/
export function textToDom(text: string): Element {
const doc = textToDomDocument(text);
if (!doc || !doc.documentElement ||
doc.getElementsByTagName('parsererror').length) {
throw Error('textToDom was unable to parse: ' + text);
let doc = domParser.parseFromString(text, 'text/xml');
if (doc && doc.documentElement &&
!doc.getElementsByTagName('parsererror').length) {
return doc.documentElement;
}
return doc.documentElement;
// Attempt to parse as HTML to deserialize control characters that were
// serialized before the serializer did proper escaping.
doc = domParser.parseFromString(text, 'text/html');
if (doc && doc.body.firstChild &&
doc.body.firstChild.nodeName.toLowerCase() === 'xml') {
return doc.body.firstChild as Element;
}
throw new Error(`DOMParser was unable to parse: ${text}`);
}
/**
@@ -124,18 +165,30 @@ export function textToDom(text: string): Element {
* @throws if XML doesn't parse.
*/
export function textToDomDocument(text: string): Document {
const oParser = new DOMParser();
return oParser.parseFromString(text, 'text/xml');
deprecation.warn(
'Blockly.utils.xml.textToDomDocument', 'version 10', 'version 11');
return domParser.parseFromString(text, 'text/xml');
}
/**
* Converts a DOM structure into plain text.
* Currently the text format is fairly ugly: all one line with no whitespace.
*
* Control characters are escaped using their decimal encodings. This includes
* U+0000 even though it is technically never a valid XML character (even in
* XML 1.1).
* https://www.w3.org/TR/xml11/#charsets
*
* When decoded U+0000 will be parsed as U+FFFD (the "replacement character").
*
* @param dom A tree of XML nodes.
* @returns Text representation.
*/
export function domToText(dom: Node): string {
const oSerializer = new XMLSerializer();
return oSerializer.serializeToString(dom);
return sanitizeText(xmlSerializer.serializeToString(dom));
}
function sanitizeText(text: string) {
return text.replace(
INVALID_CONTROL_CHARS, (match) => `&#${match.charCodeAt(0)};`);
}

View File

@@ -370,7 +370,7 @@ Serializer.Fields.LabelSerializable.ControlChars = new SerializerTestCase(
'ControlChars',
'<xml xmlns="https://developers.google.com/blockly/xml">' +
'<block type="test_fields_label_serializable" id="id******************" x="42" y="42">' +
'<field name="LABEL">&#x01;&#xa1;</field>' +
'<field name="LABEL">&#01;&#a1;</field>' +
'</block>' +
'</xml>');
Serializer.Fields.LabelSerializable.testCases = [
@@ -416,7 +416,7 @@ Serializer.Fields.MultilineInput.Tabs = new SerializerTestCase(
'<xml xmlns="https://developers.google.com/blockly/xml">' +
'<block type="test_fields_multilinetext" id="id******************" x="42" y="42">' +
'<field name="CODE">' +
'line1&amp;#10; line2&amp;#10; line3' +
'line1&amp;#10;&amp;#x9line2&amp;#10;&amp;#x9line3' +
'</field>' +
'</block>' +
'</xml>');
@@ -487,7 +487,7 @@ Serializer.Fields.MultilineInput.ControlChars = new SerializerTestCase(
'ControlChars',
'<xml xmlns="https://developers.google.com/blockly/xml">' +
'<block type="test_fields_multilinetext" id="id******************" x="42" y="42">' +
'<field name="CODE">&#x01;&#xa1;</field>' +
'<field name="CODE">&#01;&#a1;</field>' +
'</block>' +
'</xml>');
Serializer.Fields.MultilineInput.testCases = [
@@ -588,7 +588,7 @@ Serializer.Fields.TextInput.Simple = new SerializerTestCase('Simple',
Serializer.Fields.TextInput.Tabs = new SerializerTestCase('Tabs',
'<xml xmlns="https://developers.google.com/blockly/xml">' +
'<block type="test_fields_text_input" id="id******************" x="42" y="42">' +
'<field name="TEXT_INPUT">line1 line2 line3</field>' +
'<field name="TEXT_INPUT">line1&amp;#x9line2&amp;#x9line3</field>' +
'</block>' +
'</xml>');
/* eslint-enable no-tabs */
@@ -658,7 +658,7 @@ Serializer.Fields.TextInput.ControlChars = new SerializerTestCase(
'ControlChars',
'<xml xmlns="https://developers.google.com/blockly/xml">' +
'<block type="test_fields_text_input" id="id******************" x="42" y="42">' +
'<field name="TEXT_INPUT">&#x01;&#xa1;</field>' +
'<field name="TEXT_INPUT">&#01;&#a1;</field>' +
'</block>' +
'</xml>');
Serializer.Fields.TextInput.testCases = [
@@ -708,10 +708,10 @@ Serializer.Fields.Variable.Types = new SerializerTestCase('Types',
Serializer.Fields.Variable.Tabs = new SerializerTestCase('Tabs',
'<xml xmlns="https://developers.google.com/blockly/xml">' +
'<variables>' +
'<variable id="aaaaaaaaaaaaaaaaaaaa">line1 line2 line3</variable>' +
'<variable id="aaaaaaaaaaaaaaaaaaaa">line1&amp;#x9line2&amp;#x9line3</variable>' +
'</variables>' +
'<block type="variables_get" id="id******************" x="42" y="42">' +
'<field name="VAR" id="aaaaaaaaaaaaaaaaaaaa">line1 line2 line3</field>' +
'<field name="VAR" id="aaaaaaaaaaaaaaaaaaaa">line1&amp;#x9line2&amp;#x9line3</field>' +
'</block>' +
'</xml>');
/* eslint-enable no-tabs */
@@ -808,10 +808,10 @@ Serializer.Fields.Variable.ControlChars = new SerializerTestCase(
'ControlChars',
'<xml xmlns="https://developers.google.com/blockly/xml">' +
'<variables>' +
'<variable id="aaaaaaaaaaaaaaaaaaaa">&#x01;&#xa1;</variable>' +
'<variable id="aaaaaaaaaaaaaaaaaaaa">&#01;&#a1;</variable>' +
'</variables>' +
'<block type="variables_get" id="id******************" x="42" y="42">' +
'<field name="VAR" id="aaaaaaaaaaaaaaaaaaaa">&#x01;&#xa1;</field>' +
'<field name="VAR" id="aaaaaaaaaaaaaaaaaaaa">&#01;&#a1;</field>' +
'</block>' +
'</xml>');
Serializer.Fields.Variable.testCases = [
@@ -1047,7 +1047,7 @@ Serializer.Icons.Comment.Text.ControlChars = new SerializerTestCase(
'ControlChars',
'<xml xmlns="https://developers.google.com/blockly/xml">' +
'<block type="logic_negate" id="id******************" x="42" y="42">' +
'<comment pinned="false" h="80" w="160">&#x01;&#xa1;</comment>' +
'<comment pinned="false" h="80" w="160">&#01;&#a1;</comment>' +
'</block>' +
'</xml>');
Serializer.Icons.Comment.Text.testCases = [
@@ -1804,7 +1804,7 @@ Serializer.Mutations.Procedure.Names.ControlChars = new SerializerTestCase(
'ControlChars',
'<xml xmlns="https://developers.google.com/blockly/xml">' +
'<block type="procedures_defreturn" id="id******************" x="42" y="42">' +
'<field name="NAME">&#x01;&#xa1;</field>' +
'<field name="NAME">&#01;&#a1;</field>' +
'</block>' +
'</xml>');
Serializer.Mutations.Procedure.Names.testCases = [

View File

@@ -33,6 +33,9 @@ suite('XML', function() {
chai.assert.equal(fieldDom.getAttribute('id'), id);
chai.assert.equal(fieldDom.textContent, text);
};
const assertXmlDoc = function(doc) {
chai.assert.equal(doc.nodeName.toLowerCase(), 'xml', 'XML tag');
};
setup(function() {
sharedTestSetup.call(this);
Blockly.defineBlocksWithJsonArray([
@@ -73,13 +76,40 @@ suite('XML', function() {
teardown(function() {
sharedTestTeardown.call(this);
});
suite('textToDom', function() {
test('Basic', function() {
const dom = Blockly.utils.xml.textToDom(this.complexXmlText);
chai.assert.equal(dom.nodeName, 'xml', 'XML tag');
chai.assert.equal(dom.getElementsByTagName('block').length, 6, 'Block tags');
assertXmlDoc(dom);
chai.assert.equal(
dom.getElementsByTagName('block').length, 6, 'Block tags');
});
test(
'text with hex-encoded NCR Control characters are properly ' +
'deserialized',
function() {
const dom = Blockly.utils.xml.textToDom('<xml>&#x1;&#x9;&#x1F;</xml>');
assertXmlDoc(dom);
chai.assert.equal(dom.firstChild.textContent, '\u0001\t\u001f');
});
test(
'text with dec-encoded NCR Control characters are properly ' +
'deserialized',
function() {
const dom = Blockly.utils.xml.textToDom('<xml>&#1;&#9;&#31</xml>');
assertXmlDoc(dom);
chai.assert.equal(dom.firstChild.textContent, '\u0001\u0009\u001f');
});
test('text with an escaped ampersand is properly deserialized', function() {
const dom = Blockly.utils.xml.textToDom('<xml>&amp;</xml>');
assertXmlDoc(dom);
chai.assert.equal(dom.firstChild.textContent, '&');
});
});
suite('blockToDom', function() {
setup(function() {
this.workspace = new Blockly.Workspace();
@@ -433,6 +463,7 @@ suite('XML', function() {
chai.assert.equal(resultDom.children.length, 0);
});
});
suite('domToText', function() {
test('Round tripping', function() {
const dom = Blockly.utils.xml.textToDom(this.complexXmlText);
@@ -440,7 +471,26 @@ suite('XML', function() {
chai.assert.equal(text.replace(/\s+/g, ''),
this.complexXmlText.replace(/\s+/g, ''), 'Round trip');
});
test('control characters are escaped', function() {
const dom = Blockly.utils.xml.createElement('xml');
dom.appendChild(Blockly.utils.xml.createTextNode('')); // u0001
chai.assert.equal(
Blockly.utils.xml.domToText(dom),
'<xml xmlns="https://developers.google.com/blockly/xml">&#1;</xml>'
);
});
test('ampersands are escaped', function() {
const dom = Blockly.utils.xml.createElement('xml');
dom.appendChild(Blockly.utils.xml.createTextNode('&'));
chai.assert.equal(
Blockly.Xml.domToText(dom),
'<xml xmlns="https://developers.google.com/blockly/xml">&amp;</xml>'
);
});
});
suite('domToPrettyText', function() {
test('Round tripping', function() {
const dom = Blockly.utils.xml.textToDom(this.complexXmlText);