mirror of
https://github.com/google/blockly.git
synced 2026-01-04 15:40:08 +01:00
fix: non-printable characters in XML (#6952)
* fix: non-printable characters in XMl * fix: PR comments * chore: format * chore: move to module-level parser and serializer * chore: reorganize textToDom * chore: add dummy implementations of domParser and xmlSerializer * chore: properly check classes before constructing * chore: fix tests * chore: PR comments * chore: remove null char from tests * chore: docs!
This commit is contained in:
@@ -10,6 +10,22 @@ goog.declareModuleId('Blockly.utils.xml');
|
||||
import * as deprecation from './deprecation.js';
|
||||
|
||||
|
||||
let domParser: DOMParser = {
|
||||
parseFromString: function() {
|
||||
throw new Error(
|
||||
'DOMParser was not found in the global scope and was not properly ' +
|
||||
'injected using injectDependencies');
|
||||
},
|
||||
};
|
||||
|
||||
let xmlSerializer: XMLSerializer = {
|
||||
serializeToString: function() {
|
||||
throw new Error(
|
||||
'XMLSerializer was not foundin the global scope and was not properly ' +
|
||||
'injected using injectDependencies');
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Injected dependencies. By default these are just (and have the
|
||||
* same types as) the corresponding DOM Window properties, but the
|
||||
@@ -18,6 +34,8 @@ import * as deprecation from './deprecation.js';
|
||||
* package instead.
|
||||
*/
|
||||
let {document, DOMParser, XMLSerializer} = globalThis;
|
||||
if (DOMParser) domParser = new DOMParser();
|
||||
if (XMLSerializer) xmlSerializer = new XMLSerializer();
|
||||
|
||||
/**
|
||||
* Inject implementations of document, DOMParser and/or XMLSerializer
|
||||
@@ -50,6 +68,9 @@ export function injectDependencies(dependencies: {
|
||||
DOMParser = DOMParser,
|
||||
XMLSerializer = XMLSerializer,
|
||||
} = dependencies);
|
||||
|
||||
domParser = new DOMParser();
|
||||
xmlSerializer = new XMLSerializer();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -57,6 +78,9 @@ export function injectDependencies(dependencies: {
|
||||
*/
|
||||
export const NAME_SPACE = 'https://developers.google.com/blockly/xml';
|
||||
|
||||
// eslint-disable-next-line no-control-regex
|
||||
const INVALID_CONTROL_CHARS = /[\x00-\x09\x0B\x0C\x0E-\x1F]/g;
|
||||
|
||||
/**
|
||||
* Get the document object to use for XML serialization.
|
||||
*
|
||||
@@ -102,18 +126,35 @@ export function createTextNode(text: string): Text {
|
||||
/**
|
||||
* Converts an XML string into a DOM structure.
|
||||
*
|
||||
* Control characters should be escaped. (But we will try to best-effort parse
|
||||
* unescaped characters.)
|
||||
*
|
||||
* Note that even when escaped, U+0000 will be parsed as U+FFFD (the
|
||||
* "replacement character") because U+0000 is never a valid XML character
|
||||
* (even in XML 1.1).
|
||||
* https://www.w3.org/TR/xml11/#charsets
|
||||
*
|
||||
* @param text An XML string.
|
||||
* @returns A DOM object representing the singular child of the document
|
||||
* element.
|
||||
* @throws if the text doesn't parse.
|
||||
*/
|
||||
export function textToDom(text: string): Element {
|
||||
const doc = textToDomDocument(text);
|
||||
if (!doc || !doc.documentElement ||
|
||||
doc.getElementsByTagName('parsererror').length) {
|
||||
throw Error('textToDom was unable to parse: ' + text);
|
||||
let doc = domParser.parseFromString(text, 'text/xml');
|
||||
if (doc && doc.documentElement &&
|
||||
!doc.getElementsByTagName('parsererror').length) {
|
||||
return doc.documentElement;
|
||||
}
|
||||
return doc.documentElement;
|
||||
|
||||
// Attempt to parse as HTML to deserialize control characters that were
|
||||
// serialized before the serializer did proper escaping.
|
||||
doc = domParser.parseFromString(text, 'text/html');
|
||||
if (doc && doc.body.firstChild &&
|
||||
doc.body.firstChild.nodeName.toLowerCase() === 'xml') {
|
||||
return doc.body.firstChild as Element;
|
||||
}
|
||||
|
||||
throw new Error(`DOMParser was unable to parse: ${text}`);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -124,18 +165,30 @@ export function textToDom(text: string): Element {
|
||||
* @throws if XML doesn't parse.
|
||||
*/
|
||||
export function textToDomDocument(text: string): Document {
|
||||
const oParser = new DOMParser();
|
||||
return oParser.parseFromString(text, 'text/xml');
|
||||
deprecation.warn(
|
||||
'Blockly.utils.xml.textToDomDocument', 'version 10', 'version 11');
|
||||
return domParser.parseFromString(text, 'text/xml');
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a DOM structure into plain text.
|
||||
* Currently the text format is fairly ugly: all one line with no whitespace.
|
||||
*
|
||||
* Control characters are escaped using their decimal encodings. This includes
|
||||
* U+0000 even though it is technically never a valid XML character (even in
|
||||
* XML 1.1).
|
||||
* https://www.w3.org/TR/xml11/#charsets
|
||||
*
|
||||
* When decoded U+0000 will be parsed as U+FFFD (the "replacement character").
|
||||
*
|
||||
* @param dom A tree of XML nodes.
|
||||
* @returns Text representation.
|
||||
*/
|
||||
export function domToText(dom: Node): string {
|
||||
const oSerializer = new XMLSerializer();
|
||||
return oSerializer.serializeToString(dom);
|
||||
return sanitizeText(xmlSerializer.serializeToString(dom));
|
||||
}
|
||||
|
||||
function sanitizeText(text: string) {
|
||||
return text.replace(
|
||||
INVALID_CONTROL_CHARS, (match) => `&#${match.charCodeAt(0)};`);
|
||||
}
|
||||
|
||||
@@ -370,7 +370,7 @@ Serializer.Fields.LabelSerializable.ControlChars = new SerializerTestCase(
|
||||
'ControlChars',
|
||||
'<xml xmlns="https://developers.google.com/blockly/xml">' +
|
||||
'<block type="test_fields_label_serializable" id="id******************" x="42" y="42">' +
|
||||
'<field name="LABEL">¡</field>' +
|
||||
'<field name="LABEL">&#a1;</field>' +
|
||||
'</block>' +
|
||||
'</xml>');
|
||||
Serializer.Fields.LabelSerializable.testCases = [
|
||||
@@ -416,7 +416,7 @@ Serializer.Fields.MultilineInput.Tabs = new SerializerTestCase(
|
||||
'<xml xmlns="https://developers.google.com/blockly/xml">' +
|
||||
'<block type="test_fields_multilinetext" id="id******************" x="42" y="42">' +
|
||||
'<field name="CODE">' +
|
||||
'line1&#10; line2&#10; line3' +
|
||||
'line1&#10;&#x9line2&#10;&#x9line3' +
|
||||
'</field>' +
|
||||
'</block>' +
|
||||
'</xml>');
|
||||
@@ -487,7 +487,7 @@ Serializer.Fields.MultilineInput.ControlChars = new SerializerTestCase(
|
||||
'ControlChars',
|
||||
'<xml xmlns="https://developers.google.com/blockly/xml">' +
|
||||
'<block type="test_fields_multilinetext" id="id******************" x="42" y="42">' +
|
||||
'<field name="CODE">¡</field>' +
|
||||
'<field name="CODE">&#a1;</field>' +
|
||||
'</block>' +
|
||||
'</xml>');
|
||||
Serializer.Fields.MultilineInput.testCases = [
|
||||
@@ -588,7 +588,7 @@ Serializer.Fields.TextInput.Simple = new SerializerTestCase('Simple',
|
||||
Serializer.Fields.TextInput.Tabs = new SerializerTestCase('Tabs',
|
||||
'<xml xmlns="https://developers.google.com/blockly/xml">' +
|
||||
'<block type="test_fields_text_input" id="id******************" x="42" y="42">' +
|
||||
'<field name="TEXT_INPUT">line1 line2 line3</field>' +
|
||||
'<field name="TEXT_INPUT">line1&#x9line2&#x9line3</field>' +
|
||||
'</block>' +
|
||||
'</xml>');
|
||||
/* eslint-enable no-tabs */
|
||||
@@ -658,7 +658,7 @@ Serializer.Fields.TextInput.ControlChars = new SerializerTestCase(
|
||||
'ControlChars',
|
||||
'<xml xmlns="https://developers.google.com/blockly/xml">' +
|
||||
'<block type="test_fields_text_input" id="id******************" x="42" y="42">' +
|
||||
'<field name="TEXT_INPUT">¡</field>' +
|
||||
'<field name="TEXT_INPUT">&#a1;</field>' +
|
||||
'</block>' +
|
||||
'</xml>');
|
||||
Serializer.Fields.TextInput.testCases = [
|
||||
@@ -708,10 +708,10 @@ Serializer.Fields.Variable.Types = new SerializerTestCase('Types',
|
||||
Serializer.Fields.Variable.Tabs = new SerializerTestCase('Tabs',
|
||||
'<xml xmlns="https://developers.google.com/blockly/xml">' +
|
||||
'<variables>' +
|
||||
'<variable id="aaaaaaaaaaaaaaaaaaaa">line1 line2 line3</variable>' +
|
||||
'<variable id="aaaaaaaaaaaaaaaaaaaa">line1&#x9line2&#x9line3</variable>' +
|
||||
'</variables>' +
|
||||
'<block type="variables_get" id="id******************" x="42" y="42">' +
|
||||
'<field name="VAR" id="aaaaaaaaaaaaaaaaaaaa">line1 line2 line3</field>' +
|
||||
'<field name="VAR" id="aaaaaaaaaaaaaaaaaaaa">line1&#x9line2&#x9line3</field>' +
|
||||
'</block>' +
|
||||
'</xml>');
|
||||
/* eslint-enable no-tabs */
|
||||
@@ -808,10 +808,10 @@ Serializer.Fields.Variable.ControlChars = new SerializerTestCase(
|
||||
'ControlChars',
|
||||
'<xml xmlns="https://developers.google.com/blockly/xml">' +
|
||||
'<variables>' +
|
||||
'<variable id="aaaaaaaaaaaaaaaaaaaa">¡</variable>' +
|
||||
'<variable id="aaaaaaaaaaaaaaaaaaaa">&#a1;</variable>' +
|
||||
'</variables>' +
|
||||
'<block type="variables_get" id="id******************" x="42" y="42">' +
|
||||
'<field name="VAR" id="aaaaaaaaaaaaaaaaaaaa">¡</field>' +
|
||||
'<field name="VAR" id="aaaaaaaaaaaaaaaaaaaa">&#a1;</field>' +
|
||||
'</block>' +
|
||||
'</xml>');
|
||||
Serializer.Fields.Variable.testCases = [
|
||||
@@ -1047,7 +1047,7 @@ Serializer.Icons.Comment.Text.ControlChars = new SerializerTestCase(
|
||||
'ControlChars',
|
||||
'<xml xmlns="https://developers.google.com/blockly/xml">' +
|
||||
'<block type="logic_negate" id="id******************" x="42" y="42">' +
|
||||
'<comment pinned="false" h="80" w="160">¡</comment>' +
|
||||
'<comment pinned="false" h="80" w="160">&#a1;</comment>' +
|
||||
'</block>' +
|
||||
'</xml>');
|
||||
Serializer.Icons.Comment.Text.testCases = [
|
||||
@@ -1804,7 +1804,7 @@ Serializer.Mutations.Procedure.Names.ControlChars = new SerializerTestCase(
|
||||
'ControlChars',
|
||||
'<xml xmlns="https://developers.google.com/blockly/xml">' +
|
||||
'<block type="procedures_defreturn" id="id******************" x="42" y="42">' +
|
||||
'<field name="NAME">¡</field>' +
|
||||
'<field name="NAME">&#a1;</field>' +
|
||||
'</block>' +
|
||||
'</xml>');
|
||||
Serializer.Mutations.Procedure.Names.testCases = [
|
||||
|
||||
@@ -33,6 +33,9 @@ suite('XML', function() {
|
||||
chai.assert.equal(fieldDom.getAttribute('id'), id);
|
||||
chai.assert.equal(fieldDom.textContent, text);
|
||||
};
|
||||
const assertXmlDoc = function(doc) {
|
||||
chai.assert.equal(doc.nodeName.toLowerCase(), 'xml', 'XML tag');
|
||||
};
|
||||
setup(function() {
|
||||
sharedTestSetup.call(this);
|
||||
Blockly.defineBlocksWithJsonArray([
|
||||
@@ -73,13 +76,40 @@ suite('XML', function() {
|
||||
teardown(function() {
|
||||
sharedTestTeardown.call(this);
|
||||
});
|
||||
|
||||
suite('textToDom', function() {
|
||||
test('Basic', function() {
|
||||
const dom = Blockly.utils.xml.textToDom(this.complexXmlText);
|
||||
chai.assert.equal(dom.nodeName, 'xml', 'XML tag');
|
||||
chai.assert.equal(dom.getElementsByTagName('block').length, 6, 'Block tags');
|
||||
assertXmlDoc(dom);
|
||||
chai.assert.equal(
|
||||
dom.getElementsByTagName('block').length, 6, 'Block tags');
|
||||
});
|
||||
|
||||
test(
|
||||
'text with hex-encoded NCR Control characters are properly ' +
|
||||
'deserialized',
|
||||
function() {
|
||||
const dom = Blockly.utils.xml.textToDom('<xml>	</xml>');
|
||||
assertXmlDoc(dom);
|
||||
chai.assert.equal(dom.firstChild.textContent, '\u0001\t\u001f');
|
||||
});
|
||||
|
||||
test(
|
||||
'text with dec-encoded NCR Control characters are properly ' +
|
||||
'deserialized',
|
||||
function() {
|
||||
const dom = Blockly.utils.xml.textToDom('<xml>	</xml>');
|
||||
assertXmlDoc(dom);
|
||||
chai.assert.equal(dom.firstChild.textContent, '\u0001\u0009\u001f');
|
||||
});
|
||||
|
||||
test('text with an escaped ampersand is properly deserialized', function() {
|
||||
const dom = Blockly.utils.xml.textToDom('<xml>&</xml>');
|
||||
assertXmlDoc(dom);
|
||||
chai.assert.equal(dom.firstChild.textContent, '&');
|
||||
});
|
||||
});
|
||||
|
||||
suite('blockToDom', function() {
|
||||
setup(function() {
|
||||
this.workspace = new Blockly.Workspace();
|
||||
@@ -433,6 +463,7 @@ suite('XML', function() {
|
||||
chai.assert.equal(resultDom.children.length, 0);
|
||||
});
|
||||
});
|
||||
|
||||
suite('domToText', function() {
|
||||
test('Round tripping', function() {
|
||||
const dom = Blockly.utils.xml.textToDom(this.complexXmlText);
|
||||
@@ -440,7 +471,26 @@ suite('XML', function() {
|
||||
chai.assert.equal(text.replace(/\s+/g, ''),
|
||||
this.complexXmlText.replace(/\s+/g, ''), 'Round trip');
|
||||
});
|
||||
|
||||
test('control characters are escaped', function() {
|
||||
const dom = Blockly.utils.xml.createElement('xml');
|
||||
dom.appendChild(Blockly.utils.xml.createTextNode('')); // u0001
|
||||
chai.assert.equal(
|
||||
Blockly.utils.xml.domToText(dom),
|
||||
'<xml xmlns="https://developers.google.com/blockly/xml"></xml>'
|
||||
);
|
||||
});
|
||||
|
||||
test('ampersands are escaped', function() {
|
||||
const dom = Blockly.utils.xml.createElement('xml');
|
||||
dom.appendChild(Blockly.utils.xml.createTextNode('&'));
|
||||
chai.assert.equal(
|
||||
Blockly.Xml.domToText(dom),
|
||||
'<xml xmlns="https://developers.google.com/blockly/xml">&</xml>'
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
suite('domToPrettyText', function() {
|
||||
test('Round tripping', function() {
|
||||
const dom = Blockly.utils.xml.textToDom(this.complexXmlText);
|
||||
|
||||
Reference in New Issue
Block a user