Skip to content

Commit 74e9ed5

Browse files
committed
Improve performance of hidden node parsing (#1790)
1 parent f97a70a commit 74e9ed5

File tree

6 files changed

+126
-48
lines changed

6 files changed

+126
-48
lines changed

packages/langium/src/parser/cst-node-builder.ts

Lines changed: 38 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,14 @@ import type { Range } from 'vscode-languageserver-types';
99
import type { AbstractElement } from '../languages/generated/ast.js';
1010
import type { AstNode, CompositeCstNode, CstNode, LeafCstNode, RootCstNode } from '../syntax-tree.js';
1111
import { Position } from 'vscode-languageserver-types';
12-
import { isCompositeCstNode } from '../syntax-tree.js';
1312
import { tokenToRange } from '../utils/cst-utils.js';
1413

1514
export class CstNodeBuilder {
1615

1716
private rootNode!: RootCstNodeImpl;
1817
private nodeStack: CompositeCstNodeImpl[] = [];
1918

20-
private get current(): CompositeCstNodeImpl {
19+
get current(): CompositeCstNodeImpl {
2120
return this.nodeStack[this.nodeStack.length - 1] ?? this.rootNode;
2221
}
2322

@@ -37,8 +36,8 @@ export class CstNodeBuilder {
3736
return compositeNode;
3837
}
3938

40-
buildLeafNode(token: IToken, feature: AbstractElement): LeafCstNode {
41-
const leafNode = new LeafCstNodeImpl(token.startOffset, token.image.length, tokenToRange(token), token.tokenType, false);
39+
buildLeafNode(token: IToken, feature?: AbstractElement): LeafCstNode {
40+
const leafNode = new LeafCstNodeImpl(token.startOffset, token.image.length, tokenToRange(token), token.tokenType, !feature);
4241
leafNode.grammarSource = feature;
4342
leafNode.root = this.rootNode;
4443
this.current.content.push(leafNode);
@@ -55,6 +54,39 @@ export class CstNodeBuilder {
5554
}
5655
}
5756

57+
addHiddenNodes(tokens: IToken[]): void {
58+
const nodes: LeafCstNode[] = [];
59+
for (const token of tokens) {
60+
const leafNode = new LeafCstNodeImpl(token.startOffset, token.image.length, tokenToRange(token), token.tokenType, true);
61+
leafNode.root = this.rootNode;
62+
nodes.push(leafNode);
63+
}
64+
let current: CompositeCstNode = this.current;
65+
let added = false;
66+
// If we are within a composite node, we add the hidden nodes to the content
67+
if (current.content.length > 0) {
68+
current.content.push(...nodes);
69+
return;
70+
}
71+
// Otherwise we are at a newly created node
72+
// Instead of adding the hidden nodes here, we search for the first parent node with content
73+
while (current.container) {
74+
const index = current.container.content.indexOf(current);
75+
if (index > 0) {
76+
// Add the hidden nodes before the current node
77+
current.container.content.splice(index, 0, ...nodes);
78+
added = true;
79+
break;
80+
}
81+
current = current.container;
82+
}
83+
// If we arrive at the root node, we add the hidden nodes at the beginning
84+
// This is the case if the hidden nodes are the first nodes in the tree
85+
if (!added) {
86+
this.rootNode.content.unshift(...nodes);
87+
}
88+
}
89+
5890
construct(item: { $type: string | symbol | undefined, $cstNode: CstNode }): void {
5991
const current: CstNode = this.current;
6092
// The specified item could be a datatype ($type is symbol) or a fragment ($type is undefined)
@@ -70,34 +102,6 @@ export class CstNodeBuilder {
70102
this.removeNode(node);
71103
}
72104
}
73-
74-
addHiddenTokens(hiddenTokens: IToken[]): void {
75-
for (const token of hiddenTokens) {
76-
const hiddenNode = new LeafCstNodeImpl(token.startOffset, token.image.length, tokenToRange(token), token.tokenType, true);
77-
hiddenNode.root = this.rootNode;
78-
this.addHiddenToken(this.rootNode, hiddenNode);
79-
}
80-
}
81-
82-
private addHiddenToken(node: CompositeCstNode, token: LeafCstNode): void {
83-
const { offset: tokenStart, end: tokenEnd } = token;
84-
85-
for (let i = 0; i < node.content.length; i++) {
86-
const child = node.content[i];
87-
const { offset: childStart, end: childEnd } = child;
88-
if (isCompositeCstNode(child) && tokenStart > childStart && tokenEnd < childEnd) {
89-
this.addHiddenToken(child, token);
90-
return;
91-
} else if (tokenEnd <= childStart) {
92-
node.content.splice(i, 0, token);
93-
return;
94-
}
95-
}
96-
97-
// We know that we haven't found a suited position for the token
98-
// So we simply add it to the end of the current node
99-
node.content.push(token);
100-
}
101105
}
102106

103107
export abstract class AbstractCstNode implements CstNode {
@@ -107,7 +111,7 @@ export abstract class AbstractCstNode implements CstNode {
107111
abstract get range(): Range;
108112

109113
container?: CompositeCstNode;
110-
grammarSource: AbstractElement;
114+
grammarSource?: AbstractElement;
111115
root: RootCstNode;
112116
private _astNode?: AstNode;
113117

@@ -117,7 +121,7 @@ export abstract class AbstractCstNode implements CstNode {
117121
}
118122

119123
/** @deprecated use `grammarSource` instead. */
120-
get feature(): AbstractElement {
124+
get feature(): AbstractElement | undefined {
121125
return this.grammarSource;
122126
}
123127

packages/langium/src/parser/langium-parser.ts

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import type { AbstractElement, Action, Assignment, ParserRule } from '../languag
1010
import type { Linker } from '../references/linker.js';
1111
import type { LangiumCoreServices } from '../services.js';
1212
import type { AstNode, AstReflection, CompositeCstNode, CstNode } from '../syntax-tree.js';
13-
import type { Lexer } from './lexer.js';
13+
import type { Lexer, LexerResult } from './lexer.js';
1414
import type { IParserConfig } from './parser-config.js';
1515
import type { ValueConverter } from './value-converter.js';
1616
import { defaultParserErrorProvider, EmbeddedActionsParser, LLkLookaheadStrategy } from 'chevrotain';
@@ -195,6 +195,7 @@ export class LangiumParser extends AbstractLangiumParser {
195195
private readonly converter: ValueConverter;
196196
private readonly astReflection: AstReflection;
197197
private readonly nodeBuilder = new CstNodeBuilder();
198+
private lexerResult?: LexerResult;
198199
private stack: any[] = [];
199200
private assignmentMap = new Map<AbstractElement, AssignmentElement | undefined>();
200201

@@ -232,15 +233,16 @@ export class LangiumParser extends AbstractLangiumParser {
232233

233234
parse<T extends AstNode = AstNode>(input: string, options: ParserOptions = {}): ParseResult<T> {
234235
this.nodeBuilder.buildRootNode(input);
235-
const lexerResult = this.lexer.tokenize(input);
236+
const lexerResult = this.lexerResult = this.lexer.tokenize(input);
236237
this.wrapper.input = lexerResult.tokens;
237238
const ruleMethod = options.rule ? this.allRules.get(options.rule) : this.mainRule;
238239
if (!ruleMethod) {
239240
throw new Error(options.rule ? `No rule found with name '${options.rule}'` : 'No main rule available.');
240241
}
241242
const result = ruleMethod.call(this.wrapper, {});
242-
this.nodeBuilder.addHiddenTokens(lexerResult.hidden);
243+
this.nodeBuilder.addHiddenNodes(lexerResult.hidden);
243244
this.unorderedGroups.clear();
245+
this.lexerResult = undefined;
244246
return {
245247
value: result,
246248
lexerErrors: lexerResult.errors,
@@ -273,9 +275,26 @@ export class LangiumParser extends AbstractLangiumParser {
273275
};
274276
}
275277

278+
private extractHiddenTokens(token: IToken): IToken[] {
279+
const hiddenTokens = this.lexerResult!.hidden;
280+
if (!hiddenTokens.length) {
281+
return [];
282+
}
283+
const offset = token.startOffset;
284+
for (let i = 0; i < hiddenTokens.length; i++) {
285+
const token = hiddenTokens[i];
286+
if (token.startOffset > offset) {
287+
return hiddenTokens.splice(0, i);
288+
}
289+
}
290+
return hiddenTokens.splice(0, hiddenTokens.length);
291+
}
292+
276293
consume(idx: number, tokenType: TokenType, feature: AbstractElement): void {
277294
const token = this.wrapper.wrapConsume(idx, tokenType);
278295
if (!this.isRecording() && this.isValidToken(token)) {
296+
const hiddenTokens = this.extractHiddenTokens(token);
297+
this.nodeBuilder.addHiddenNodes(hiddenTokens);
279298
const leafNode = this.nodeBuilder.buildLeafNode(token, feature);
280299
const { assignment, isCrossRef } = this.getAssignment(feature);
281300
const current = this.current;

packages/langium/src/serializer/hydrator.ts

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -296,23 +296,22 @@ export class DefaultHydrator implements Hydrator {
296296
return this.lexer.definition[name];
297297
}
298298

299-
protected getGrammarElementId(node: AbstractElement): number | undefined {
299+
protected getGrammarElementId(node: AbstractElement | undefined): number | undefined {
300+
if (!node) {
301+
return undefined;
302+
}
300303
if (this.grammarElementIdMap.size === 0) {
301304
this.createGrammarElementIdMap();
302305
}
303306
return this.grammarElementIdMap.get(node);
304307
}
305308

306-
protected getGrammarElement(id: number): AbstractElement {
309+
protected getGrammarElement(id: number): AbstractElement | undefined {
307310
if (this.grammarElementIdMap.size === 0) {
308311
this.createGrammarElementIdMap();
309312
}
310313
const element = this.grammarElementIdMap.getKey(id);
311-
if (element) {
312-
return element;
313-
} else {
314-
throw new Error('Invalid grammar element id: ' + id);
315-
}
314+
return element;
316315
}
317316

318317
protected createGrammarElementIdMap(): void {

packages/langium/src/syntax-tree.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,9 +234,9 @@ export interface CstNode extends DocumentSegment {
234234
/** The root CST node */
235235
readonly root: RootCstNode;
236236
/** The grammar element from which this node was parsed */
237-
readonly grammarSource: AbstractElement;
237+
readonly grammarSource?: AbstractElement;
238238
/** @deprecated use `grammarSource` instead. */
239-
readonly feature: AbstractElement;
239+
readonly feature?: AbstractElement;
240240
/** The AST node created from this CST node */
241241
readonly astNode: AstNode;
242242
/** @deprecated use `astNode` instead. */

packages/langium/test/parser/langium-parser.test.ts

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
* terms of the MIT License, which is available in the project root.
55
******************************************************************************/
66

7-
import type { AstNode, LangiumCoreServices } from 'langium';
7+
import { EmptyFileSystem, type AstNode, type LangiumCoreServices } from 'langium';
88
import { describe, expect, test, beforeEach } from 'vitest';
9-
import { createServicesForGrammar } from 'langium/grammar';
9+
import { createLangiumGrammarServices, createServicesForGrammar } from 'langium/grammar';
1010
import { parseHelper } from 'langium/test';
1111

1212
describe('Partial parsing', () => {
@@ -66,6 +66,29 @@ describe('Partial parsing', () => {
6666

6767
});
6868

69+
describe('hidden node parsing', () => {
70+
71+
test('finishes in expected time', async () => {
72+
const parser = createLangiumGrammarServices(EmptyFileSystem).grammar.parser.LangiumParser;
73+
let content = 'Rule:';
74+
// Adding hidden nodes used to cause exponential parsing time behavior
75+
for (let i = 0; i < 2500; i++) {
76+
content += "'a' /* A */ /* B */ /* C */\n";
77+
}
78+
content += ';';
79+
const start = Date.now();
80+
// This roughly takes 100-300 ms on a modern machine
81+
// If it takes longer, the hidden node parsing is likely to be exponential
82+
// On an older version of the parser, this took ~5 seconds
83+
const result = parser.parse(content);
84+
expect(result.lexerErrors).toHaveLength(0);
85+
expect(result.parserErrors).toHaveLength(0);
86+
const end = Date.now();
87+
expect(end - start).toBeLessThan(1000);
88+
});
89+
90+
});
91+
6992
interface A extends AstNode {
7093
name: string
7194
}

packages/langium/test/utils/cst-utils.test.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,39 @@ describe('findLeafNode', () => {
6262
}
6363
});
6464

65+
describe('findCommentNode', () => {
66+
test('Finds correct comment with multiple comments before and after', async () => {
67+
const text = expandToString`
68+
Main: value=AB;
69+
terminal AB:
70+
/** A */
71+
/** B */
72+
'A'
73+
/** C */
74+
/** D */;
75+
`;
76+
const grammar = await parser(text);
77+
const offset = text.indexOf("'A'") + 1;
78+
const leafNode = findLeafNodeAtOffset(grammar.parseResult.value.$cstNode!, offset);
79+
const keyword = leafNode?.astNode;
80+
expect(keyword).toBeDefined();
81+
const comment = CstUtils.findCommentNode(keyword?.$cstNode, ['ML_COMMENT']);
82+
expect(comment?.text).toBe('/** B */');
83+
});
84+
85+
test('Finds correct comment at the start of the file', async () => {
86+
const text = expandToString`
87+
/** A */
88+
/** B */
89+
/** C */
90+
grammar test
91+
`;
92+
const grammar = await parser(text);
93+
const comment = CstUtils.findCommentNode(grammar.parseResult.value.$cstNode, ['ML_COMMENT']);
94+
expect(comment?.text).toBe('/** C */');
95+
});
96+
});
97+
6598
describe('compareRange', () => {
6699
test.each([
67100
// Different lines

0 commit comments

Comments
 (0)