Skip to content

Commit 659f553

Browse files
authored
Implement the reference Fluent Syntax parser (#103)
This is the official reference implementation of the parser. The parser focuses on strictness and correctness at a cost of reduced performance. The ASDL description of the AST has been removed in favor of `syntax/ast.mjs` which defines the actual AST nodes returned by the reference parser. The EBNF is now auto-generated from the reference parser's `syntax/grammar.mjs` file. It provides an easy to read overview of the grammar and will continue to be updated in the future. Going forward, all changes to the grammar will be implemented in the reference parser first, which also ships with an extensive test suite. Notable changes: - Added junk entries. The grammar now explicitly defines the `junk_line` production which is converted into `Junk` during the AST construction. - Comments may now be attached to Messages or Terms. The grammar now encodes the behavior for Comments preceding Messages and Terms. If there are no blank lines between the Comment and the Message or the Term, the Comment becomes part of the `Message` or the `Term` AST node. - Fixed many issues with the white-space grammar. The EBNF for Syntax 0.5 had many issues with its use of `inline-space` and `break-indent`. It defined undesirable or impossible parsing behavior. These issues have been fixed and the new test suite will help ensure the correctness of the grammar in the future.
1 parent 69ecabc commit 659f553

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+13027
-3045
lines changed

.eslintrc

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"parserOptions": {
3+
"ecmaVersion": 2018,
4+
"sourceType": "module"
5+
},
6+
"env": {
7+
"es6": true,
8+
"node": true
9+
},
10+
"rules": {
11+
"eqeqeq": ["error", "always"],
12+
"indent": ["error", 4, {
13+
"CallExpression": {"arguments": 1},
14+
"MemberExpression": "off",
15+
"SwitchCase": 1
16+
}],
17+
"quotes": ["error", "double"],
18+
"semi": ["error", "always"],
19+
"no-tabs": "error",
20+
"no-undef": "error"
21+
}
22+
}

.travis.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
sudo: false
2+
language: node_js
3+
script: npm run ci
4+
node_js: 9
5+
cache:
6+
directories: node_modules
7+
notifications:
8+
irc:
9+
channels:
10+
- "irc.mozilla.org#l20n"
11+
on_success: always
12+
on_failure: always
13+
use_notice: true
14+
template:
15+
- "#%{build_number} %{message} %{build_url}"
16+
- "Change by %{author}: %{compare_url}"

README.md

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,34 @@
33
Fluent is a localization system designed to unleash the expressive power of
44
the natural language.
55

6-
This repository contains specification, design and documentation for Fluent.
7-
6+
This repository contains the specification, the reference implementation of the
7+
parser and the documentation for Fluent.
88

99
## Fluent Syntax (FTL)
1010

1111
FTL is the syntax for describing translation resources in Project Fluent. FTL
12-
stands for *Fluent Translation List*.
12+
stands for *Fluent Translation List*. Read the [Fluent Syntax Guide][] to get
13+
started learning Fluent.
14+
15+
The `syntax/` directory contains the reference implementation of the syntax as
16+
a _LL(infinity)_ parser.
17+
18+
The `spec/` directory contains the formal EBNF grammar, autogenerated from the
19+
reference implementation.
20+
21+
## Development
22+
23+
While working on the reference parser, use the following commands to test and
24+
validate your work:
25+
26+
npm test # Test the parser against JSON AST fixtures.
27+
npm run lint # Lint the parser code.
1328

14-
Read the [Fluent Syntax Guide][]. The `spec/` directory contains the formal
15-
grammar of the Fluent Syntax.
29+
npm run build:ebnf # Generate the EBNF from syntax/grammar.mjs.
30+
npm run build:fixtures # Generate test fixtures (FTL → JSON AST).
31+
npm run build:guide # Build the HTML version of the Guide.
1632

33+
npm run bench # Run the performance benchmark on large FTL.
1734

1835
## Learn more
1936

bin/ebnf.mjs

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import fs from "fs";
2+
import readline from "readline";
3+
import parse_args from "minimist";
4+
import ebnf from "../lib/ebnf.mjs";
5+
6+
const argv = parse_args(process.argv.slice(2), {
7+
boolean: ["help"],
8+
alias: {
9+
help: "h",
10+
},
11+
});
12+
13+
if (argv.help) {
14+
exit_help(0);
15+
}
16+
17+
const [file_path] = argv._;
18+
19+
if (file_path === "-") {
20+
from_stdin();
21+
} else if (file_path) {
22+
from_file(file_path);
23+
} else {
24+
exit_help(1);
25+
}
26+
27+
function exit_help(exit_code) {
28+
console.log(`
29+
Usage: node --experimental-modules ebnf.mjs [OPTIONS] <FILE>
30+
31+
When FILE is "-", read text from stdin.
32+
33+
Examples:
34+
35+
node --experimental-modules ebnf.mjs path/to/grammar.mjs
36+
cat path/to/grammar.mjs | node --experimental-modules ebnf.mjs -
37+
38+
Options:
39+
40+
-h, --help Display help and quit.
41+
`);
42+
process.exit(exit_code);
43+
}
44+
45+
function from_stdin() {
46+
const rl = readline.createInterface({
47+
input: process.stdin,
48+
output: process.stdout,
49+
prompt: "fluent>",
50+
});
51+
52+
const lines = [];
53+
54+
rl.on("line", line => lines.push(line));
55+
rl.on("close", () =>
56+
print_ebnf(lines.join("\n") + "\n"));
57+
}
58+
59+
function from_file(file_path) {
60+
fs.readFile(file_path, "utf8", (err, content) => {
61+
if (err) {
62+
throw err;
63+
}
64+
65+
print_ebnf(content);
66+
});
67+
}
68+
69+
function print_ebnf(source) {
70+
// Each EBNF rule already ends with \n.
71+
process.stdout.write(ebnf(source));
72+
}

bin/parse.mjs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import fs from "fs";
2+
import readline from "readline";
3+
import parse_args from "minimist";
4+
import {Resource} from "../syntax/grammar.mjs";
5+
6+
const argv = parse_args(process.argv.slice(2), {
7+
boolean: ["help"],
8+
alias: {
9+
help: "h",
10+
},
11+
});
12+
13+
if (argv.help) {
14+
exit_help(0);
15+
}
16+
17+
const [file_path] = argv._;
18+
19+
if (file_path === "-") {
20+
parse_stdin();
21+
} else if (file_path) {
22+
parse_file(file_path);
23+
} else {
24+
exit_help(1);
25+
}
26+
27+
function exit_help(exit_code) {
28+
console.log(`
29+
Usage: node --experimental-modules parse.mjs [OPTIONS] <FILE>
30+
31+
When FILE is "-", read text from stdin.
32+
33+
Examples:
34+
35+
node --experimental-modules parse.mjs path/to/file.ftl
36+
cat path/to/file.ftl | node --experimental-modules parse.mjs -
37+
38+
Options:
39+
40+
-h, --help Display help and quit.
41+
`);
42+
process.exit(exit_code);
43+
}
44+
45+
function parse_stdin() {
46+
const rl = readline.createInterface({
47+
input: process.stdin,
48+
output: process.stdout,
49+
prompt: "fluent>",
50+
});
51+
52+
const lines = [];
53+
54+
rl.on("line", line => lines.push(line));
55+
rl.on("close", () =>
56+
parse(lines.join("\n") + "\n"));
57+
}
58+
59+
function parse_file(file_path) {
60+
fs.readFile(file_path, "utf8", (err, content) => {
61+
if (err) {
62+
throw err;
63+
}
64+
65+
parse(content);
66+
});
67+
}
68+
69+
70+
function parse(ftl) {
71+
Resource.run(ftl).fold(
72+
ast => console.log(JSON.stringify(ast, null, 4)),
73+
err => console.error(err));
74+
}

lib/combinators.mjs

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
import Parser from "./parser.mjs";
2+
import {Success, Failure} from "./result.mjs";
3+
import {join} from "./mappers.mjs";
4+
5+
export function defer(fn) {
6+
// Parsers may be defined as defer(() => parser) to avoid cyclic
7+
// dependecies.
8+
return new Parser(stream =>
9+
fn().run(stream));
10+
}
11+
12+
export function char(c) {
13+
return new Parser(stream =>
14+
stream.head() === c
15+
? new Success(c, stream.move(1))
16+
: new Failure(`${c} not found`, stream));
17+
}
18+
19+
export function regex(re) {
20+
return new Parser(stream => {
21+
const result = stream.exec(re);
22+
23+
if (result === null) {
24+
return new Failure("regex did not match", stream);
25+
}
26+
27+
const [match] = result;
28+
29+
return new Success(match, stream.move(match.length));
30+
});
31+
}
32+
33+
export function charset(range) {
34+
return regex(new RegExp(`[${range}]`));
35+
}
36+
37+
export function string(str) {
38+
return sequence(...str.split("").map(char)).map(join);
39+
}
40+
41+
export function eof() {
42+
return new Parser(stream =>
43+
stream.head() === Symbol.for("eof")
44+
? new Success(stream.head(), stream.move(1))
45+
: new Failure("not at EOF", stream));
46+
}
47+
48+
export function lookahead(parser) {
49+
return new Parser(stream =>
50+
parser
51+
.run(stream)
52+
.fold(
53+
value => new Success(value, stream),
54+
value => new Failure(value, stream)));
55+
}
56+
57+
export function not(parser) {
58+
return new Parser(stream =>
59+
parser
60+
.run(stream)
61+
.fold(
62+
(value, tail) => new Failure("not failed", stream),
63+
(value, tail) => new Success(null, stream)));
64+
}
65+
66+
export function and(...parsers) {
67+
const final = parsers.pop();
68+
return sequence(...parsers.map(lookahead), final)
69+
.map(results => results[results.length - 1]);
70+
}
71+
72+
export function either(...parsers) {
73+
return new Parser(stream => {
74+
for (const parser of parsers) {
75+
const result = parser.run(stream);
76+
if (result instanceof Success) {
77+
return result;
78+
}
79+
}
80+
return new Failure("either failed", stream);
81+
});
82+
}
83+
84+
export function always(value) {
85+
return new Parser(stream => new Success(value, stream));
86+
}
87+
88+
export function never(value) {
89+
return new Parser(stream => new Failure(value, stream));
90+
}
91+
92+
export function maybe(parser) {
93+
return new Parser(stream =>
94+
parser
95+
.run(stream)
96+
.fold(
97+
(value, tail) => new Success(value, tail),
98+
(value, tail) => new Success(undefined, stream)));
99+
}
100+
101+
export function append(p1, p2) {
102+
return p1.chain(values =>
103+
p2.map(value => values.concat([value])));
104+
}
105+
106+
export function after(prefix, parser) {
107+
return sequence(prefix, parser)
108+
.map(([left, value]) => value);
109+
}
110+
111+
export function sequence(...parsers) {
112+
return parsers.reduce(
113+
(acc, parser) => append(acc, parser),
114+
always([]));
115+
}
116+
117+
export function repeat(parser) {
118+
return new Parser(stream =>
119+
parser
120+
.run(stream)
121+
.fold(
122+
(value, tail) =>
123+
repeat(parser)
124+
.map(rest => [value].concat(rest))
125+
.run(tail),
126+
(value, tail) => new Success([], stream)));
127+
}
128+
129+
export function repeat1(parser) {
130+
return new Parser(stream =>
131+
parser
132+
.run(stream)
133+
.fold(
134+
(value, tail) =>
135+
repeat(parser)
136+
.map(rest => [value].concat(rest))
137+
.run(tail),
138+
(value, tail) => new Failure("repeat1 failed", stream)));
139+
}

lib/ebnf.mjs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import babylon from "babylon";
2+
import walk from "./walker.mjs";
3+
import visitor from "./visitor.mjs";
4+
import serialize from "./serializer.mjs";
5+
6+
export default
7+
function ebnf(source) {
8+
let grammar_ast = babylon.parse(source, {sourceType: "module"});
9+
let rules = walk(grammar_ast, visitor);
10+
let state = {
11+
max_name_length: Math.max(
12+
...rules.map(rule => rule.name.length)),
13+
};
14+
return rules
15+
.map(rule => serialize(rule, state))
16+
.join("");
17+
}
18+

0 commit comments

Comments
 (0)