perf(AppShell): drop named entities from parse5 (#74)

mgechev · jeffbcross · commit 6bd212a96b17 · 2016-07-07T11:19:36.000-07:00
diff --git a/app-shell/package.json b/app-shell/package.json
@@ -11,7 +11,7 @@
     "pree2e": "webdriver-manager update",
     "e2e": "protractor",
     "clean": "rm -rf dist",
-    "build": "ng build && tsc -p src/tsconfig.publish.es5.json && tsc -p src/tsconfig.publish.es6.json && cp src/package.json dist/app/package.json && browserify dist/app/shell-parser/index.js -s shellParserFactory > dist/app/shell-parser.js && rm -rf dist/app/shell-parser && rm -rf dist/app/vendor",
+    "build": "ng build && tsc -p src/tsconfig.publish.es5.json && tsc -p src/tsconfig.publish.es6.json && cp src/package.json dist/app/package.json && echo '' > dist/vendor/parse5/lib/tokenizer/named_entity_trie.js && browserify dist/app/shell-parser/index.js -s shellParserFactory > dist/app/shell-parser.js && rm -rf dist/app/shell-parser && rm -rf dist/app/vendor",
     "build_publish": "npm run clean && npm run build"
   },
   "private": true,
diff --git a/app-shell/src/app/shell-parser/template-parser/parse5/drop-named-entities-patch.spec.ts b/app-shell/src/app/shell-parser/template-parser/parse5/drop-named-entities-patch.spec.ts
@@ -0,0 +1,49 @@
+declare var require: any;
+import {
+  beforeEach,
+  it,
+  describe,
+  expect,
+  inject
+} from '@angular/core/testing';
+
+import { Parse5TemplateParser } from './parse5-template-parser';
+
+describe('dropped named entities patch', () => {
+
+  let parser: Parse5TemplateParser;
+  beforeEach(() => {
+    parser = new Parse5TemplateParser();
+  });
+
+  describe('parse', () => {
+
+    it('should not modify character references', () => {
+      const tree = parser.parse('<body>&nbsp;</div>');
+      const body = <any>(tree.childNodes[0].childNodes[1]);
+      expect(body.childNodes[0].value).toBe('&nbsp;');
+    });
+
+    it('should not modify character references in attribute values', () => {
+      const tree = parser.parse('<body style="&quote;"></body>');
+      const body = <any>(tree.childNodes[0].childNodes[1]);
+      expect(body.attrs[0].value).toBe('&quote;');
+    });
+
+  });
+
+  describe('serialize', () => {
+
+    it('should serialize named entities properly', () => {
+      const template = parser.serialize(parser.parse('<body>&nbsp;</div>'));
+      expect(template).toBe('<html><head></head><body>&nbsp;</body></html>');
+    });
+
+    it('should serialize named entities in attributes properly', () => {
+      const template = parser.serialize(parser.parse('<body style="&quote;">&nbsp;</div>'));
+      expect(template).toBe('<html><head></head><body style="&quote;">&nbsp;</body></html>');
+    });
+
+  });
+
+});
diff --git a/app-shell/src/app/shell-parser/template-parser/parse5/drop-named-entities-patch.ts b/app-shell/src/app/shell-parser/template-parser/parse5/drop-named-entities-patch.ts
@@ -0,0 +1,62 @@
+var Tokenizer = require('../../../../vendor/parse5/lib/tokenizer');
+var Serializer = require('../../../../vendor/parse5/lib/serializer');
+var CP = require('../../../../vendor/parse5/lib/common/unicode').CODE_POINTS;
+var STATES_MAP: {[key: string]: string} = {
+  CHARACTER_REFERENCE_IN_DATA_STATE: 'DATA_STATE',
+  CHARACTER_REFERENCE_IN_RCDATA_STATE: 'RCDATA_STATE',
+  CHARACTER_REFERENCE_IN_ATTRIBUTE_VALUE_STATE: 'DATA_STATE'
+};
+
+function isAsciiDigit(cp: number) {
+  return cp >= CP.DIGIT_0 && cp <= CP.DIGIT_9;
+}
+
+function isWhitespace(cp: number) {
+  return cp === CP.SPACE || cp === CP.LINE_FEED || cp === CP.TABULATION || cp === CP.FORM_FEED;
+}
+
+function isAsciiUpper(cp: number) {
+  return cp >= CP.LATIN_CAPITAL_A && cp <= CP.LATIN_CAPITAL_Z;
+}
+
+function isAsciiLower(cp: number) {
+  return cp >= CP.LATIN_SMALL_A && cp <= CP.LATIN_SMALL_Z;
+}
+
+function isAsciiAlphaNumeric(cp: number) {
+  return isAsciiDigit(cp) || isAsciiUpper(cp) || isAsciiLower(cp);
+}
+
+function isDigit(cp: number, isHex: boolean) {
+  return isAsciiDigit(cp) || isHex && (cp >= CP.LATIN_CAPITAL_A && cp <= CP.LATIN_CAPITAL_F ||
+				       cp >= CP.LATIN_SMALL_A && cp <= CP.LATIN_SMALL_F);
+}
+
+Serializer.escapeString = function (str: string) {
+  return str;
+};
+
+// Monkey patching this method intents to decrease the bundle size
+// of the runtime parser by allowing us to drop the "named_entity_trie".
+Tokenizer.prototype._consumeCharacterReference = function (startCp: number, inAttr: boolean) {
+  if (isWhitespace(startCp) || startCp === CP.GREATER_THAN_SIGN ||
+    startCp === CP.AMPERSAND || startCp === this.additionalAllowedCp || startCp === CP.EOF) {
+    this._unconsume();
+    return null;
+  }
+  if (startCp === CP.NUMBER_SIGN) {
+    var isHex = false;
+    var nextCp = this._lookahead();
+
+    if (nextCp === CP.LATIN_SMALL_X || nextCp === CP.LATIN_CAPITAL_X) {
+      this._consume();
+      isHex = true;
+    }
+    nextCp = this._lookahead();
+    if (nextCp !== CP.EOF && isDigit(nextCp, isHex))
+      return [this._consumeNumericEntity(isHex)];
+    this._unconsumeSeveral(isHex ? 2 : 1);
+    return null;
+  }
+  return this._reconsumeInState(STATES_MAP[this.state]);
+};
diff --git a/app-shell/src/app/shell-parser/template-parser/parse5/parse5-template-parser.ts b/app-shell/src/app/shell-parser/template-parser/parse5/parse5-template-parser.ts
@@ -1,7 +1,8 @@
 import {ASTNode} from '../../ast';
 import {TemplateParser} from '../template-parser';
 
-import './tokenizer-patch';
+import './tokenizer-case-sensitivity-patch';
+import './drop-named-entities-patch';
 
 var Parser = require('../../../../vendor/parse5/lib/parser');
 var Serializer = require('../../../../vendor/parse5/lib/serializer');
diff --git a/app-shell/src/app/shell-parser/template-parser/parse5/tokenizer-case-sensitivity-patch.spec.ts b/app-shell/src/app/shell-parser/template-parser/parse5/tokenizer-case-sensitivity-patch.spec.ts
@@ -1,4 +1,4 @@
-import './tokenizer-patch';
+import './tokenizer-case-sensitivity-patch';
 
 import {
   beforeEach,
diff --git a/app-shell/src/app/shell-parser/template-parser/parse5/tokenizer-case-sensitivity-patch.ts b/app-shell/src/app/shell-parser/template-parser/parse5/tokenizer-case-sensitivity-patch.ts

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-import './tokenizer-patch';`
	`1`	`+import './tokenizer-case-sensitivity-patch';`
`2`	`2`
`3`	`3`	`import {`
`4`	`4`	`beforeEach,`