Skip to content

Commit 00700b2

Browse files
author
Guillaume Chau
committed
HTML parser rewriting
1 parent 6c5e5ec commit 00700b2

File tree

2 files changed

+97
-156
lines changed

2 files changed

+97
-156
lines changed

packages/vue-component/package.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ Package.registerBuildPlugin({
3838
'autoprefixer': '6.7.5',
3939
'vue-template-compiler': '2.2.6',
4040
'vue-template-es2015-compiler': '1.5.1',
41+
'parse5': '3.0.2',
4142
}
4243
});
4344

Lines changed: 96 additions & 156 deletions
Original file line numberDiff line numberDiff line change
@@ -1,172 +1,112 @@
1+
import parse5 from 'parse5'
2+
import { Meteor } from 'meteor/meteor'
3+
14
scanHtmlForTags = function scanHtmlForTags(options) {
2-
const scan = new HtmlScan(options);
3-
return scan.getTags();
5+
try {
6+
return parseHtml(options)
7+
} catch (e) {
8+
throwCompileError(e)
9+
}
410
};
511

6-
/**
7-
* Scan an HTML file for top-level tags and extract their contents. Pass them to
8-
* a tag handler (an object with a handleTag method)
9-
*
10-
* This is a primitive, regex-based scanner. It scans
11-
* top-level tags, which are allowed to have attributes,
12-
* and ignores top-level HTML comments.
13-
*/
14-
class HtmlScan {
15-
/**
16-
* Initialize and run a scan of a single file
17-
* @param {String} sourceName The filename, used in errors only
18-
* @param {String} contents The contents of the file
19-
* @param {String[]} tagNames An array of tag names that are accepted at the
20-
* top level. If any other tag is encountered, an error is thrown.
21-
*/
22-
constructor({
23-
sourceName,
24-
contents,
25-
tagNames
26-
}) {
27-
this.sourceName = sourceName;
28-
this.contents = contents;
29-
this.tagNames = tagNames;
30-
31-
this.rest = contents;
32-
this.index = 0;
33-
34-
this.tags = [];
35-
36-
tagNameRegex = this.tagNames.join("|");
37-
const openTagRegex = new RegExp(`^((<(${tagNameRegex})\\b)|(<!--)|(<!DOCTYPE|{{!)|$)`, "i");
38-
39-
while (this.rest) {
40-
// skip whitespace first (for better line numbers)
41-
this.advance(this.rest.match(/^\s*/)[0].length);
42-
43-
const match = openTagRegex.exec(this.rest);
44-
45-
if (! match) {
46-
this.throwCompileError(`Expected one of: <${this.tagNames.join('>, <')}>`);
47-
}
48-
49-
const matchToken = match[1];
50-
const matchTokenTagName = match[3];
51-
const matchTokenComment = match[4];
52-
const matchTokenUnsupported = match[5];
53-
54-
const tagStartIndex = this.index;
55-
this.advance(match.index + match[0].length);
56-
57-
if (! matchToken) {
58-
break; // matched $ (end of file)
59-
}
12+
const parseHtml = Meteor.wrapAsync(({
13+
sourceName,
14+
contents,
15+
tagNames
16+
}, cb) => {
17+
18+
const tags = []
19+
20+
const parser = new parse5.SAXParser({
21+
locationInfo: true,
22+
})
23+
24+
let depth = 0
25+
let info
26+
27+
function addTag() {
28+
const tagContents = contents.substring(info.start.index, info.end.index)
29+
30+
const tag = {
31+
tagName: info.tag.name,
32+
attribs: info.tag.attrs,
33+
contents: tagContents,
34+
contentsStartIndex: info.start.index,
35+
tagStartIndex: info.tag.index,
36+
fileContents: contents,
37+
sourceName: sourceName,
38+
startLine: info.start.line,
39+
endLine: info.end.line
40+
}
6041

61-
if (matchTokenComment === '<!--') {
62-
// top-level HTML comment
63-
const commentEnd = /--\s*>/.exec(this.rest);
64-
if (! commentEnd)
65-
this.throwCompileError("unclosed HTML comment in template file");
66-
this.advance(commentEnd.index + commentEnd[0].length);
67-
continue;
68-
}
42+
// save the tag
43+
tags.push(tag)
44+
}
6945

70-
if (matchTokenUnsupported) {
71-
switch (matchTokenUnsupported.toLowerCase()) {
72-
case '<!doctype':
73-
this.throwCompileError(
74-
"Can't set DOCTYPE here. (Meteor sets <!DOCTYPE html> for you)");
75-
case '{{!':
76-
this.throwCompileError(
77-
"Can't use '{{! }}' outside a template. Use '<!-- -->'.");
46+
parser.on('startTag', (name, attrs, selfClosing, location) => {
47+
if (depth === 0) {
48+
if (tagNames.indexOf(name) !== -1) {
49+
info = {
50+
tag: {
51+
name,
52+
attrs: attrs.reduce((dic, attr) => {
53+
const value = attr.value === '' ? true : attr.value
54+
dic[attr.name] = value
55+
return dic
56+
}, {}),
57+
index: location.startOffset,
58+
},
59+
start: {
60+
line: location.line,
61+
index: location.endOffset,
62+
},
7863
}
7964

80-
this.throwCompileError();
81-
}
65+
if (selfClosing) {
66+
info.end = {
67+
line: location.line,
68+
index: location.endOffset,
69+
}
8270

83-
// otherwise, a <tag>
84-
const tagName = matchTokenTagName.toLowerCase();
85-
const tagAttribs = {}; // bare name -> value dict
86-
const tagPartRegex = /^\s*((([a-zA-Z0-9:_-]+)\s*(=\s*(["'])(.*?)\5)?)|(>))/;
87-
88-
// read attributes
89-
let attr;
90-
while ((attr = tagPartRegex.exec(this.rest))) {
91-
const attrToken = attr[1];
92-
const attrKey = attr[3];
93-
let attrValue = attr[6];
94-
this.advance(attr.index + attr[0].length);
95-
96-
if (attrToken === '>') {
97-
break;
71+
addTag()
72+
} else {
73+
depth++
9874
}
99-
100-
// XXX we don't HTML unescape the attribute value
101-
// (e.g. to allow "abcd&quot;efg") or protect against
102-
// collisions with methods of tagAttribs (e.g. for
103-
// a property named toString)
104-
attrValue = attrValue && attrValue.match(/^\s*([\s\S]*?)\s*$/)[1]; // trim
105-
tagAttribs[attrKey] = attrValue;
106-
}
107-
108-
if (! attr) { // didn't end on '>'
109-
this.throwCompileError(`Parse error in tag ${tagName}`);
11075
}
111-
112-
// find </tag>
113-
const end = (new RegExp('</'+tagName+'\\s*>', 'i')).exec(this.rest);
114-
if (! end) {
115-
this.throwCompileError("unclosed <"+tagName+">");
116-
}
117-
118-
const tagContents = this.rest.slice(0, end.index);
119-
const contentsStartIndex = this.index;
120-
121-
// trim the tag contents.
122-
// this is a courtesy and is also relied on by some unit tests.
123-
var m = tagContents.match(/^([ \t\r\n]*)([\s\S]*?)[ \t\r\n]*$/);
124-
const trimmedContentsStartIndex = contentsStartIndex + m[1].length;
125-
const trimmedTagContents = m[2];
126-
127-
const tag = {
128-
tagName: tagName,
129-
attribs: tagAttribs,
130-
contents: trimmedTagContents,
131-
contentsStartIndex: trimmedContentsStartIndex,
132-
tagStartIndex: tagStartIndex,
133-
fileContents: this.contents,
134-
sourceName: this.sourceName
135-
};
136-
137-
// save the tag
138-
this.tags.push(tag);
139-
140-
// advance afterwards, so that line numbers in errors are correct
141-
this.advance(end.index + end[0].length);
76+
} else if (name === info.tag.name) {
77+
depth ++
14278
}
143-
}
79+
})
14480

145-
/**
146-
* Advance the parser
147-
* @param {Number} amount The amount of characters to advance
148-
*/
149-
advance(amount) {
150-
this.rest = this.rest.substring(amount);
151-
this.index += amount;
152-
}
153-
154-
throwCompileError(msg, overrideIndex) {
155-
const finalIndex = (typeof overrideIndex === 'number' ? overrideIndex : this.index);
81+
parser.on('endTag', (name, location) => {
82+
if (depth !== 0 && name === info.tag.name) {
83+
depth--
15684

157-
const err = new TemplatingTools.CompileError();
158-
err.message = msg || "bad formatting in template file";
159-
err.file = this.sourceName;
160-
err.line = this.contents.substring(0, finalIndex).split('\n').length;
85+
if (depth === 0) {
86+
info.end = {
87+
line: location.line,
88+
index: location.startOffset - 1,
89+
}
16190

162-
throw err;
163-
}
91+
addTag()
92+
}
93+
}
94+
})
95+
96+
parser.on('end', () => {
97+
if (depth !== 0) {
98+
cb({
99+
path: sourceName,
100+
line: info.start.line,
101+
tag: info.tag.name,
102+
message: `Missing closing </${info.tag.name}>`,
103+
}, null)
104+
return
105+
}
164106

165-
throwBodyAttrsError(msg) {
166-
this.parseError(msg);
167-
}
107+
cb(null, tags)
108+
})
168109

169-
getTags() {
170-
return this.tags;
171-
}
172-
}
110+
parser.write(contents)
111+
parser.end()
112+
})

0 commit comments

Comments
 (0)