Skip to content

Commit 235822d

Browse files
committed
Python: Improve handling of syntax errors
Rather than relying on matching arbitrary nodes inside tree-sitter-graph and then checking whether they are of type ERROR or MISSING (which seems to have stopped working in later versions of tree-sitter), we now explicitly go through the tree-sitter tree, locating all of the error and missing nodes along the way. We then add these on to the graph output in the same format as was previously produced by tree-sitter-graph. Note that it's very likely that some of the syntax errors will move around a bit as a consequence of this change. In general, we don't expect syntax errors to have stable locations, as small changes in the grammar can cause an error to appear in a different position, even if the underlying (erroneous) code has not changed.
1 parent b108d47 commit 235822d

File tree

2 files changed

+104
-13
lines changed

2 files changed

+104
-13
lines changed

python/extractor/tsg-python/python.tsg

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,6 @@
66
(module) @mod
77
{ let @mod.node = (ast-node @mod "Module") }
88

9-
(_) @anynode
10-
{
11-
scan (node-type @anynode) {
12-
"^(ERROR|MISSING)$" {
13-
let @anynode.node = (ast-node @anynode "SyntaxErrorNode")
14-
attr (@anynode.node) source = (source-text @anynode)
15-
}
16-
}
17-
}
18-
199
(parenthesized_expression) @nd
2010
{ let @nd.node = (ast-node @nd "Expr") }
2111

python/extractor/tsg-python/src/main.rs

Lines changed: 104 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,99 @@ pub mod extra_functions {
480480
}
481481
}
482482

483+
struct TreeIterator<'a> {
484+
nodes_to_visit: Vec<tree_sitter::Node<'a>>,
485+
}
486+
487+
impl<'a> TreeIterator<'a> {
488+
fn new(root: tree_sitter::Node<'a>) -> Self {
489+
Self {
490+
nodes_to_visit: vec![root],
491+
}
492+
}
493+
}
494+
495+
impl<'a> Iterator for TreeIterator<'a> {
496+
type Item = tree_sitter::Node<'a>;
497+
498+
fn next(&mut self) -> Option<Self::Item> {
499+
if let Some(node) = self.nodes_to_visit.pop() {
500+
// Add all children to the queue for processing
501+
self.nodes_to_visit
502+
.extend((0..node.child_count()).rev().filter_map(|i| node.child(i)));
503+
Some(node)
504+
} else {
505+
None
506+
}
507+
}
508+
}
509+
510+
#[derive(Debug, Clone)]
511+
struct SyntaxError {
512+
start_pos: tree_sitter::Point,
513+
end_pos: tree_sitter::Point,
514+
source: String,
515+
}
516+
517+
fn syntax_errors_from_tree<'a>(
518+
root: tree_sitter::Node<'a>,
519+
source: &'a str,
520+
) -> impl Iterator<Item = SyntaxError> + 'a {
521+
TreeIterator::new(root)
522+
.filter(|&node| node.is_error() || node.is_missing())
523+
.map(move |node| {
524+
let start_pos = node.start_position();
525+
let end_pos = node.end_position();
526+
let text = &source[node.byte_range()];
527+
SyntaxError {
528+
start_pos,
529+
end_pos,
530+
source: text.to_string(),
531+
}
532+
})
533+
}
534+
535+
fn add_syntax_error_nodes(graph: &mut tree_sitter_graph::graph::Graph, errors: &[SyntaxError]) {
536+
for error in errors {
537+
let error_node = graph.add_graph_node();
538+
539+
// Add _kind attribute
540+
graph[error_node]
541+
.attributes
542+
.add(
543+
tree_sitter_graph::Identifier::from("_kind"),
544+
tree_sitter_graph::graph::Value::String("SyntaxErrorNode".to_string()),
545+
)
546+
.expect("Fresh node should not have duplicate attributes");
547+
548+
// Add _location attribute
549+
let location = tree_sitter_graph::graph::Value::List(
550+
vec![
551+
error.start_pos.row,
552+
error.start_pos.column,
553+
error.end_pos.row,
554+
error.end_pos.column,
555+
]
556+
.into_iter()
557+
.map(|v| tree_sitter_graph::graph::Value::from(v as u32))
558+
.collect(),
559+
);
560+
graph[error_node]
561+
.attributes
562+
.add(tree_sitter_graph::Identifier::from("_location"), location)
563+
.expect("Fresh node should not have duplicate attributes");
564+
565+
// Add source attribute
566+
graph[error_node]
567+
.attributes
568+
.add(
569+
tree_sitter_graph::Identifier::from("source"),
570+
tree_sitter_graph::graph::Value::String(error.source.clone()),
571+
)
572+
.expect("Fresh node should not have duplicate attributes");
573+
}
574+
}
575+
483576
fn main() -> Result<()> {
484577
let matches = Command::new("tsg-python")
485578
.version(BUILD_VERSION)
@@ -581,10 +674,18 @@ fn main() -> Result<()> {
581674
);
582675

583676
let globals = Variables::new();
584-
let mut config = ExecutionConfig::new(&mut functions, &globals).lazy(false);
585-
let graph = file
586-
.execute(&tree, &source, &mut config, &NoCancellation)
677+
let config = ExecutionConfig::new(&functions, &globals).lazy(false);
678+
let mut graph = file
679+
.execute(&tree, &source, &config, &NoCancellation)
587680
.with_context(|| format!("Could not execute TSG file {}", tsg_path))?;
681+
682+
// Collect and add syntax error nodes to the graph
683+
if tree.root_node().has_error() {
684+
let syntax_errors: Vec<SyntaxError> =
685+
syntax_errors_from_tree(tree.root_node(), &source).collect();
686+
add_syntax_error_nodes(&mut graph, &syntax_errors);
687+
}
688+
588689
print!("{}", graph.pretty_print());
589690
Ok(())
590691
}

0 commit comments

Comments
 (0)