Skip to content
This repository was archived by the owner on Oct 12, 2021. It is now read-only.

Commit 6bd212a

Browse files
mgechevjeffbcross
authored andcommitted
perf(AppShell): drop named entities from parse5 (#74)
1 parent b7abae4 commit 6bd212a

File tree

6 files changed

+115
-3
lines changed

6 files changed

+115
-3
lines changed

app-shell/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
"pree2e": "webdriver-manager update",
1212
"e2e": "protractor",
1313
"clean": "rm -rf dist",
14-
"build": "ng build && tsc -p src/tsconfig.publish.es5.json && tsc -p src/tsconfig.publish.es6.json && cp src/package.json dist/app/package.json && browserify dist/app/shell-parser/index.js -s shellParserFactory > dist/app/shell-parser.js && rm -rf dist/app/shell-parser && rm -rf dist/app/vendor",
14+
"build": "ng build && tsc -p src/tsconfig.publish.es5.json && tsc -p src/tsconfig.publish.es6.json && cp src/package.json dist/app/package.json && echo '' > dist/vendor/parse5/lib/tokenizer/named_entity_trie.js && browserify dist/app/shell-parser/index.js -s shellParserFactory > dist/app/shell-parser.js && rm -rf dist/app/shell-parser && rm -rf dist/app/vendor",
1515
"build_publish": "npm run clean && npm run build"
1616
},
1717
"private": true,
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
declare var require: any;
2+
import {
3+
beforeEach,
4+
it,
5+
describe,
6+
expect,
7+
inject
8+
} from '@angular/core/testing';
9+
10+
import { Parse5TemplateParser } from './parse5-template-parser';
11+
12+
describe('dropped named entities patch', () => {
13+
14+
let parser: Parse5TemplateParser;
15+
beforeEach(() => {
16+
parser = new Parse5TemplateParser();
17+
});
18+
19+
describe('parse', () => {
20+
21+
it('should not modify character references', () => {
22+
const tree = parser.parse('<body>&nbsp;</div>');
23+
const body = <any>(tree.childNodes[0].childNodes[1]);
24+
expect(body.childNodes[0].value).toBe('&nbsp;');
25+
});
26+
27+
it('should not modify character references in attribute values', () => {
28+
const tree = parser.parse('<body style="&quote;"></body>');
29+
const body = <any>(tree.childNodes[0].childNodes[1]);
30+
expect(body.attrs[0].value).toBe('&quote;');
31+
});
32+
33+
});
34+
35+
describe('serialize', () => {
36+
37+
it('should serialize named entities properly', () => {
38+
const template = parser.serialize(parser.parse('<body>&nbsp;</div>'));
39+
expect(template).toBe('<html><head></head><body>&nbsp;</body></html>');
40+
});
41+
42+
it('should serialize named entities in attributes properly', () => {
43+
const template = parser.serialize(parser.parse('<body style="&quote;">&nbsp;</div>'));
44+
expect(template).toBe('<html><head></head><body style="&quote;">&nbsp;</body></html>');
45+
});
46+
47+
});
48+
49+
});
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
var Tokenizer = require('../../../../vendor/parse5/lib/tokenizer');
2+
var Serializer = require('../../../../vendor/parse5/lib/serializer');
3+
var CP = require('../../../../vendor/parse5/lib/common/unicode').CODE_POINTS;
4+
var STATES_MAP: {[key: string]: string} = {
5+
CHARACTER_REFERENCE_IN_DATA_STATE: 'DATA_STATE',
6+
CHARACTER_REFERENCE_IN_RCDATA_STATE: 'RCDATA_STATE',
7+
CHARACTER_REFERENCE_IN_ATTRIBUTE_VALUE_STATE: 'DATA_STATE'
8+
};
9+
10+
function isAsciiDigit(cp: number) {
11+
return cp >= CP.DIGIT_0 && cp <= CP.DIGIT_9;
12+
}
13+
14+
function isWhitespace(cp: number) {
15+
return cp === CP.SPACE || cp === CP.LINE_FEED || cp === CP.TABULATION || cp === CP.FORM_FEED;
16+
}
17+
18+
function isAsciiUpper(cp: number) {
19+
return cp >= CP.LATIN_CAPITAL_A && cp <= CP.LATIN_CAPITAL_Z;
20+
}
21+
22+
function isAsciiLower(cp: number) {
23+
return cp >= CP.LATIN_SMALL_A && cp <= CP.LATIN_SMALL_Z;
24+
}
25+
26+
function isAsciiAlphaNumeric(cp: number) {
27+
return isAsciiDigit(cp) || isAsciiUpper(cp) || isAsciiLower(cp);
28+
}
29+
30+
function isDigit(cp: number, isHex: boolean) {
31+
return isAsciiDigit(cp) || isHex && (cp >= CP.LATIN_CAPITAL_A && cp <= CP.LATIN_CAPITAL_F ||
32+
cp >= CP.LATIN_SMALL_A && cp <= CP.LATIN_SMALL_F);
33+
}
34+
35+
Serializer.escapeString = function (str: string) {
36+
return str;
37+
};
38+
39+
// Monkey patching this method intents to decrease the bundle size
40+
// of the runtime parser by allowing us to drop the "named_entity_trie".
41+
Tokenizer.prototype._consumeCharacterReference = function (startCp: number, inAttr: boolean) {
42+
if (isWhitespace(startCp) || startCp === CP.GREATER_THAN_SIGN ||
43+
startCp === CP.AMPERSAND || startCp === this.additionalAllowedCp || startCp === CP.EOF) {
44+
this._unconsume();
45+
return null;
46+
}
47+
if (startCp === CP.NUMBER_SIGN) {
48+
var isHex = false;
49+
var nextCp = this._lookahead();
50+
51+
if (nextCp === CP.LATIN_SMALL_X || nextCp === CP.LATIN_CAPITAL_X) {
52+
this._consume();
53+
isHex = true;
54+
}
55+
nextCp = this._lookahead();
56+
if (nextCp !== CP.EOF && isDigit(nextCp, isHex))
57+
return [this._consumeNumericEntity(isHex)];
58+
this._unconsumeSeveral(isHex ? 2 : 1);
59+
return null;
60+
}
61+
return this._reconsumeInState(STATES_MAP[this.state]);
62+
};

app-shell/src/app/shell-parser/template-parser/parse5/parse5-template-parser.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import {ASTNode} from '../../ast';
22
import {TemplateParser} from '../template-parser';
33

4-
import './tokenizer-patch';
4+
import './tokenizer-case-sensitivity-patch';
5+
import './drop-named-entities-patch';
56

67
var Parser = require('../../../../vendor/parse5/lib/parser');
78
var Serializer = require('../../../../vendor/parse5/lib/serializer');

app-shell/src/app/shell-parser/template-parser/parse5/tokenizer-patch.spec.ts renamed to app-shell/src/app/shell-parser/template-parser/parse5/tokenizer-case-sensitivity-patch.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import './tokenizer-patch';
1+
import './tokenizer-case-sensitivity-patch';
22

33
import {
44
beforeEach,

0 commit comments

Comments
 (0)