From a2976ae833edb090b95cdd934c753b7cc44ab536 Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Sun, 2 Nov 2025 00:44:56 +0100 Subject: [PATCH 01/25] - add rst parser - add remove leading chars --- docs/source/components/configuration.rst | 2 +- docs/source/components/directive.rst | 22 +++++ pyproject.toml | 1 + src/sphinx_codelinks/analyse/analyse.py | 5 +- src/sphinx_codelinks/config.py | 5 ++ .../sphinx_extension/sn_rst.lark | 31 +++++++ .../sphinx_extension/sn_rst_parser.py | 88 +++++++++++++++++++ tests/data/dcdc/charge/demo_2.cpp | 4 +- tests/data/sphinx/src_trace.toml | 2 + tests/test_rst_parser.py | 58 ++++++++++++ 10 files changed, 214 insertions(+), 4 deletions(-) create mode 100644 src/sphinx_codelinks/sphinx_extension/sn_rst.lark create mode 100644 src/sphinx_codelinks/sphinx_extension/sn_rst_parser.py create mode 100644 tests/test_rst_parser.py diff --git a/docs/source/components/configuration.rst b/docs/source/components/configuration.rst index d2fd522..e69908c 100644 --- a/docs/source/components/configuration.rst +++ b/docs/source/components/configuration.rst @@ -380,7 +380,7 @@ get_need_id_refs Enables the extraction of need IDs from source code comments. When enabled, **SourceAnalyse** will parse comments for specific markers that indicate need IDs, allowing them to be extracted for further usages. **Type:** ``bool`` -**Default:** ``False`` +**Default:** ``True`` .. code-block:: toml diff --git a/docs/source/components/directive.rst b/docs/source/components/directive.rst index f885bee..d73bd98 100644 --- a/docs/source/components/directive.rst +++ b/docs/source/components/directive.rst @@ -5,6 +5,28 @@ Directive .. attention:: ``src-trace`` directive currently only supports :ref:`one-line need definition `. +``src-trace`` Directive generates Sphinx-Needs items from source code comments. There are two ways to define need items in source code: + +1. **One-line need definition**: Define needs in a single line comment. + + Example in C++: + + .. code-block:: cpp + + // @ title, id_123, implementation, [link1, link2] + +2. **RST block need definition**: Define needs in a RST block comment. + + Example in C++: + + .. code-block:: cpp + + /* + .. implementation:: title + :id: id_123 + :links: link1, link2 + */ + ``CodeLinks`` provides ``src-trace`` directive and it can be used in the following ways: .. code-block:: rst diff --git a/pyproject.toml b/pyproject.toml index ae7b5f7..c999355 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ dependencies = [ "tree-sitter~=0.25.1", "tree-sitter-c-sharp>=0.23.1", "tree-sitter-yaml>=0.7.1", + "lark>=1.3.1", ] [build-system] diff --git a/src/sphinx_codelinks/analyse/analyse.py b/src/sphinx_codelinks/analyse/analyse.py index 79bc051..215589b 100644 --- a/src/sphinx_codelinks/analyse/analyse.py +++ b/src/sphinx_codelinks/analyse/analyse.py @@ -277,7 +277,10 @@ def extract_marked_rst( if not extracted_rst: return None if UNIX_NEWLINE in extracted_rst["rst_text"]: - rst_text = utils.remove_leading_sequences(extracted_rst["rst_text"], ["*"]) + rst_text = utils.remove_leading_sequences( + extracted_rst["rst_text"], + self.analyse_config.marked_rst_config.leading_sequences, + ) else: rst_text = extracted_rst["rst_text"] lineno = src_comment.node.start_point.row + extracted_rst["row_offset"] + 1 diff --git a/src/sphinx_codelinks/config.py b/src/sphinx_codelinks/config.py index 9f51422..a026aae 100644 --- a/src/sphinx_codelinks/config.py +++ b/src/sphinx_codelinks/config.py @@ -85,6 +85,11 @@ def field_names(cls) -> set[str]: default="@endrst", metadata={"schema": {"type": "string"}} ) """Chars sequence to indicate the end of the rst text.""" + leading_sequences: list[str] = field( + default_factory=lambda: ["*"], + metadata={"schema": {"type": "array", "items": {"type": "string"}}}, + ) + """List of leading sequences to be stripped from each line of the rst text.""" @classmethod def get_schema(cls, name: str) -> dict[str, Any] | None: # type: ignore[explicit-any] diff --git a/src/sphinx_codelinks/sphinx_extension/sn_rst.lark b/src/sphinx_codelinks/sphinx_extension/sn_rst.lark new file mode 100644 index 0000000..ab197b0 --- /dev/null +++ b/src/sphinx_codelinks/sphinx_extension/sn_rst.lark @@ -0,0 +1,31 @@ +start: directive + +directive: INDENT_DIRECTIVE? ".." _WS NAME "::" _WS title? _NEWLINE options_block? content_block? + +title: TITLE + +options_block: option+ + +option: INDENT ":" OPTION_NAME ":" _WS? OPTION_VALUE? _NEWLINE + +content_block: _NEWLINE content_line+ + +content_line: INDENT TEXT _NEWLINE + +OPTION_NAME: /[a-zA-Z0-9_-]+/ + +OPTION_VALUE: /[^\n]+/ + +NAME: /[a-zA-Z0-9_-]+/ + +TITLE: /[^\n]+/ + +TEXT: /.+/ + +_NEWLINE: /\r?\n/ + +_WS: /[ \t]+/ + +INDENT_DIRECTIVE: /[ \t]+/ + +INDENT: " " diff --git a/src/sphinx_codelinks/sphinx_extension/sn_rst_parser.py b/src/sphinx_codelinks/sphinx_extension/sn_rst_parser.py new file mode 100644 index 0000000..3f373e6 --- /dev/null +++ b/src/sphinx_codelinks/sphinx_extension/sn_rst_parser.py @@ -0,0 +1,88 @@ +"""Test script for RST directive Lark parser.""" + +# ruff: noqa: N802 +from pathlib import Path + +from lark import Lark, Transformer, v_args + + +@v_args(inline=True) +class DirectiveTransformer(Transformer): + def NAME(self, tok): + return str(tok) + + def TITLE(self, tok): + return str(tok).strip() + + def OPTION_NAME(self, tok): + return str(tok) + + def OPTION_VALUE(self, tok): + return str(tok).strip() + + def TEXT(self, tok): + return str(tok) + + def INDENT(self, tok): + """Return the length of the indent.""" + return len(str(tok)) + + def title(self, title): + return {"title": title} + + def option(self, _indent, name, value=None): + return (name, value) + + def options_block(self, *options): + return {"options": dict(options)} + + def content_line( + self, + _indent, + text, + ): + return text + + def content_block(self, *lines): + # items is list of lines + return {"content": "\n".join(lines)} + + def directive(self, name, *optionals): + # NAME,, optional title/options/content + need = {"type": name} + for item in optionals: + if "title" in item: + need["title"] = item["title"] + elif "options" in item: + need["options"] = item["options"] + elif "content" in item: + need["content"] = item["content"] + + return need + + +def get_parser() -> Lark: + """Get the Lark parser for RST directives.""" + + # Load the grammar + grammar_path = Path(__file__).parent / "sn_rst.lark" + grammar = grammar_path.read_text() + + parser = Lark( + grammar, + start="directive", + parser="lalr", + propagate_positions=True, + maybe_placeholders=False, + ) + + return parser + + +def parse_rst(text: str) -> dict: + """Parse the given RST directive text and return the parsed data.""" + parser = get_parser() + tree = parser.parse(text) + transformer = DirectiveTransformer() + result = transformer.transform(tree) + return result diff --git a/tests/data/dcdc/charge/demo_2.cpp b/tests/data/dcdc/charge/demo_2.cpp index dc2b941..cd797e2 100644 --- a/tests/data/dcdc/charge/demo_2.cpp +++ b/tests/data/dcdc/charge/demo_2.cpp @@ -33,9 +33,9 @@ } /** - * @brief Function with a rst blocks. + * @rst * .. impl:: Feature G - Data loss prevention - * + * @endrst * Some description here. * [[ IMPL_main_demo2, main func in demo_2]] */ diff --git a/tests/data/sphinx/src_trace.toml b/tests/data/sphinx/src_trace.toml index c19e368..c36b326 100644 --- a/tests/data/sphinx/src_trace.toml +++ b/tests/data/sphinx/src_trace.toml @@ -16,6 +16,8 @@ exclude = ["dcdc/src/ubt/ubt.cpp"] include = ["**/*.cpp", "**/*.hpp"] gitignore = true +[codelinks.projects.dcdc.analyse] +get_rst = true [codelinks.projects.dcdc.analyse.oneline_comment_style] start_sequence = "[[" diff --git a/tests/test_rst_parser.py b/tests/test_rst_parser.py new file mode 100644 index 0000000..457283c --- /dev/null +++ b/tests/test_rst_parser.py @@ -0,0 +1,58 @@ +import pytest + +from sphinx_codelinks.sphinx_extension.sn_rst_parser import parse_rst + + +@pytest.mark.parametrize( + ("text", "expected"), + [ + ( + ".. req:: title1\n", + {"type": "req", "title": "title1"}, + ), + ( + ".. impl:: User Authentication\n :status: open\n :priority: high\n", + { + "type": "impl", + "title": "User Authentication", + "options": {"status": "open", "priority": "high"}, + }, + ), + ( + ".. impl:: Data Processing\n\n This is the implementation content.\n It spans multiple lines.\n", + { + "type": "impl", + "title": "Data Processing", + "content": "This is the implementation content.\nIt spans multiple lines.", + }, + ), + ( + ".. spec:: API Specification\n :version: 1.0\n :author: Dev Team\n\n This specification defines the REST API endpoints.\n", + { + "type": "spec", + "title": "API Specification", + "options": {"version": "1.0", "author": "Dev Team"}, + "content": "This specification defines the REST API endpoints.", + }, + ), + ( + ".. test:: Test Case\n :status:\n :priority: low\n", + { + "type": "test", + "title": "Test Case", + "options": {"status": None, "priority": "low"}, + }, + ), + ( + ".. impl:: Feature #123: Export\n :status: in-progress\n", + { + "type": "impl", + "title": "Feature #123: Export", + "options": {"status": "in-progress"}, + }, + ), + ], +) +def test_sn_rst_parser(text: str, expected: dict): + result = parse_rst(text) + assert result == expected From d140e3e52939fd4a69c6398f0dc9d5b98e0b4b84 Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Mon, 3 Nov 2025 18:41:23 +0100 Subject: [PATCH 02/25] update parser --- .../sphinx_extension/sn_rst.lark | 24 +++++++----- .../sphinx_extension/sn_rst_parser.py | 39 ++++++++++++++++--- tests/test_rst_parser.py | 20 +++++++++- 3 files changed, 66 insertions(+), 17 deletions(-) diff --git a/src/sphinx_codelinks/sphinx_extension/sn_rst.lark b/src/sphinx_codelinks/sphinx_extension/sn_rst.lark index ab197b0..5157057 100644 --- a/src/sphinx_codelinks/sphinx_extension/sn_rst.lark +++ b/src/sphinx_codelinks/sphinx_extension/sn_rst.lark @@ -1,31 +1,37 @@ start: directive -directive: INDENT_DIRECTIVE? ".." _WS NAME "::" _WS title? _NEWLINE options_block? content_block? +directive: INDENT_DIRECTIVE? ".." _WS NAME "::" _WS title_block? directive_block? -title: TITLE +title_block: TEXT_NO_COLUMN _NEWLINE multi_lines_title* | _NEWLINE multi_lines_title+ + +multi_lines_title: INDENT TEXT_NO_COLUMN _NEWLINE + +directive_block: options_block (_NEWLINE content_block)? | _NEWLINE content_block options_block: option+ -option: INDENT ":" OPTION_NAME ":" _WS? OPTION_VALUE? _NEWLINE +option: INDENT OPTION_NAME _WS? OPTION_VALUE? _NEWLINE -content_block: _NEWLINE content_line+ +content_block: content_line+ content_line: INDENT TEXT _NEWLINE -OPTION_NAME: /[a-zA-Z0-9_-]+/ +INDENT: " " + +OPTION_NAME: /:[a-zA-Z0-9_-]+:/ OPTION_VALUE: /[^\n]+/ NAME: /[a-zA-Z0-9_-]+/ -TITLE: /[^\n]+/ +TEXT_NO_COLUMN: /(?!.*:[a-zA-Z0-9_-]+:)[^\r\n]+/ -TEXT: /.+/ +TEXT: /[^\r\n]+/ + +NEWLINE_IN_CONTENT: /\r?\n/ _NEWLINE: /\r?\n/ _WS: /[ \t]+/ INDENT_DIRECTIVE: /[ \t]+/ - -INDENT: " " diff --git a/src/sphinx_codelinks/sphinx_extension/sn_rst_parser.py b/src/sphinx_codelinks/sphinx_extension/sn_rst_parser.py index 3f373e6..3e5e26c 100644 --- a/src/sphinx_codelinks/sphinx_extension/sn_rst_parser.py +++ b/src/sphinx_codelinks/sphinx_extension/sn_rst_parser.py @@ -15,7 +15,7 @@ def TITLE(self, tok): return str(tok).strip() def OPTION_NAME(self, tok): - return str(tok) + return str(tok).replace(":", "").strip() def OPTION_VALUE(self, tok): return str(tok).strip() @@ -23,12 +23,21 @@ def OPTION_VALUE(self, tok): def TEXT(self, tok): return str(tok) + def TEXT_NO_COLUMN(self, tok): + return str(tok) + def INDENT(self, tok): """Return the length of the indent.""" return len(str(tok)) - def title(self, title): - return {"title": title} + def NEWLINE_IN_CONTENT(self, tok): + return str(tok) + + def multi_lines_title(self, *title_line): + return title_line[1] + + def title_block(self, *title): + return {"title": " ".join(title)} def option(self, _indent, name, value=None): return (name, value) @@ -36,21 +45,39 @@ def option(self, _indent, name, value=None): def options_block(self, *options): return {"options": dict(options)} - def content_line( + def first_line( self, _indent, text, ): - return text + return text.rstrip() + + def content_line(self, *line): + if len(line) == 1: + # it's a NEWLINE_IN_CONTENT + return line[0].rstrip() + else: + # it's an indented TEXT + return line[1].rstrip() def content_block(self, *lines): # items is list of lines return {"content": "\n".join(lines)} + def directive_block(self, *blocks): + return blocks + def directive(self, name, *optionals): # NAME,, optional title/options/content need = {"type": name} + # flaten optionals + flatten_optionals = [] for item in optionals: + if isinstance(item, tuple): + flatten_optionals.extend(item) + else: + flatten_optionals.append(item) + for item in flatten_optionals: if "title" in item: need["title"] = item["title"] elif "options" in item: @@ -71,7 +98,7 @@ def get_parser() -> Lark: parser = Lark( grammar, start="directive", - parser="lalr", + parser="earley", propagate_positions=True, maybe_placeholders=False, ) diff --git a/tests/test_rst_parser.py b/tests/test_rst_parser.py index 457283c..4ea49de 100644 --- a/tests/test_rst_parser.py +++ b/tests/test_rst_parser.py @@ -6,10 +6,26 @@ @pytest.mark.parametrize( ("text", "expected"), [ + ( + ".. req:: ", + {"type": "req"}, + ), + ( + ".. req:: no newline", + {"type": "req", "title": "no newline"}, + ), ( ".. req:: title1\n", {"type": "req", "title": "title1"}, ), + ( + ".. req:: multi-line title1\n still title2\n still title3\n", + {"type": "req", "title": "multi-line title1 still title2 still title3"}, + ), + ( + ".. req:: \n multi-line title1\n still title2\n still title3\n", + {"type": "req", "title": "multi-line title1 still title2 still title3"}, + ), ( ".. impl:: User Authentication\n :status: open\n :priority: high\n", { @@ -19,10 +35,10 @@ }, ), ( - ".. impl:: Data Processing\n\n This is the implementation content.\n It spans multiple lines.\n", + ".. impl:: no options but content\n\n This is the implementation content.\n It spans multiple lines.\n", { "type": "impl", - "title": "Data Processing", + "title": "no options but content", "content": "This is the implementation content.\nIt spans multiple lines.", }, ), From c46bb4b635c4faf1b146095324cc8c55cdbfa2bd Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Mon, 3 Nov 2025 18:43:58 +0100 Subject: [PATCH 03/25] update grammar --- src/sphinx_codelinks/sphinx_extension/sn_rst.lark | 2 +- tests/test_rst_parser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sphinx_codelinks/sphinx_extension/sn_rst.lark b/src/sphinx_codelinks/sphinx_extension/sn_rst.lark index 5157057..6f005db 100644 --- a/src/sphinx_codelinks/sphinx_extension/sn_rst.lark +++ b/src/sphinx_codelinks/sphinx_extension/sn_rst.lark @@ -1,6 +1,6 @@ start: directive -directive: INDENT_DIRECTIVE? ".." _WS NAME "::" _WS title_block? directive_block? +directive: INDENT_DIRECTIVE? ".." _WS NAME "::" _NEWLINE? (_WS title_block)? directive_block? title_block: TEXT_NO_COLUMN _NEWLINE multi_lines_title* | _NEWLINE multi_lines_title+ diff --git a/tests/test_rst_parser.py b/tests/test_rst_parser.py index 4ea49de..64bec54 100644 --- a/tests/test_rst_parser.py +++ b/tests/test_rst_parser.py @@ -7,7 +7,7 @@ ("text", "expected"), [ ( - ".. req:: ", + ".. req::\n", {"type": "req"}, ), ( From b2fa418c12442f6a55da28335f11513c26e9d73d Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Mon, 3 Nov 2025 21:34:53 +0100 Subject: [PATCH 04/25] update --- .../sphinx_extension/sn_rst.lark | 4 +- .../sphinx_extension/sn_rst_parser.py | 15 ++-- tests/test_rst_parser.py | 89 +++++++++++++++++-- 3 files changed, 88 insertions(+), 20 deletions(-) diff --git a/src/sphinx_codelinks/sphinx_extension/sn_rst.lark b/src/sphinx_codelinks/sphinx_extension/sn_rst.lark index 6f005db..9cb4a95 100644 --- a/src/sphinx_codelinks/sphinx_extension/sn_rst.lark +++ b/src/sphinx_codelinks/sphinx_extension/sn_rst.lark @@ -14,7 +14,7 @@ option: INDENT OPTION_NAME _WS? OPTION_VALUE? _NEWLINE content_block: content_line+ -content_line: INDENT TEXT _NEWLINE +content_line: INDENT TEXT _NEWLINE | _NEWLINE INDENT: " " @@ -30,7 +30,7 @@ TEXT: /[^\r\n]+/ NEWLINE_IN_CONTENT: /\r?\n/ -_NEWLINE: /\r?\n/ +_NEWLINE: /[ \t]*\r?\n/ _WS: /[ \t]+/ diff --git a/src/sphinx_codelinks/sphinx_extension/sn_rst_parser.py b/src/sphinx_codelinks/sphinx_extension/sn_rst_parser.py index 3e5e26c..85d74e2 100644 --- a/src/sphinx_codelinks/sphinx_extension/sn_rst_parser.py +++ b/src/sphinx_codelinks/sphinx_extension/sn_rst_parser.py @@ -24,7 +24,7 @@ def TEXT(self, tok): return str(tok) def TEXT_NO_COLUMN(self, tok): - return str(tok) + return str(tok).strip() def INDENT(self, tok): """Return the length of the indent.""" @@ -36,8 +36,8 @@ def NEWLINE_IN_CONTENT(self, tok): def multi_lines_title(self, *title_line): return title_line[1] - def title_block(self, *title): - return {"title": " ".join(title)} + def title_block(self, *titles): + return {"title": " ".join(titles)} def option(self, _indent, name, value=None): return (name, value) @@ -45,14 +45,9 @@ def option(self, _indent, name, value=None): def options_block(self, *options): return {"options": dict(options)} - def first_line( - self, - _indent, - text, - ): - return text.rstrip() - def content_line(self, *line): + if not line: + return "" if len(line) == 1: # it's a NEWLINE_IN_CONTENT return line[0].rstrip() diff --git a/tests/test_rst_parser.py b/tests/test_rst_parser.py index 64bec54..b376bc1 100644 --- a/tests/test_rst_parser.py +++ b/tests/test_rst_parser.py @@ -6,34 +6,36 @@ @pytest.mark.parametrize( ("text", "expected"), [ + # Minimal directive - only type, no title/options/content ( ".. req::\n", {"type": "req"}, ), - ( - ".. req:: no newline", - {"type": "req", "title": "no newline"}, - ), + # Simple inline title on same line as directive marker ( ".. req:: title1\n", {"type": "req", "title": "title1"}, ), + # Inline title + indented continuation lines (TEXT_NO_COLUMN concatenates multi-line titles) ( ".. req:: multi-line title1\n still title2\n still title3\n", {"type": "req", "title": "multi-line title1 still title2 still title3"}, ), + # Title entirely on indented lines (title_block alternative: _NEWLINE multi_lines_title+) ( ".. req:: \n multi-line title1\n still title2\n still title3\n", {"type": "req", "title": "multi-line title1 still title2 still title3"}, ), + # Indented title stops at option line (TEXT_NO_COLUMN rejects :option: pattern) ( - ".. impl:: User Authentication\n :status: open\n :priority: high\n", + ".. req:: \n multi-line title1\n still title2\n :option:\n", { - "type": "impl", - "title": "User Authentication", - "options": {"status": "open", "priority": "high"}, + "type": "req", + "title": "multi-line title1 still title2", + "options": {"option": None}, }, ), + # Title + content block with blank line separator (directive_block content path) ( ".. impl:: no options but content\n\n This is the implementation content.\n It spans multiple lines.\n", { @@ -42,6 +44,7 @@ "content": "This is the implementation content.\nIt spans multiple lines.", }, ), + # Title + options + content (complete directive_block: options_block + _NEWLINE content_block) ( ".. spec:: API Specification\n :version: 1.0\n :author: Dev Team\n\n This specification defines the REST API endpoints.\n", { @@ -51,6 +54,7 @@ "content": "This specification defines the REST API endpoints.", }, ), + # Empty option values (OPTION_VALUE? optional in option rule) ( ".. test:: Test Case\n :status:\n :priority: low\n", { @@ -59,6 +63,7 @@ "options": {"status": None, "priority": "low"}, }, ), + # Title with special characters - single colons allowed (only :word: pattern forbidden) ( ".. impl:: Feature #123: Export\n :status: in-progress\n", { @@ -67,6 +72,74 @@ "options": {"status": "in-progress"}, }, ), + # Trailing spaces in title trimmed (_NEWLINE: /[ \t]*\r?\n/ consumes whitespace) + ( + ".. req:: title with spaces \n", + {"type": "req", "title": "title with spaces"}, + ), + # Inline title continuation + options (multi_lines_title* stops at :option: line) + ( + ".. impl:: Initial title\n continuation of title\n :status: active\n", + { + "type": "impl", + "title": "Initial title continuation of title", + "options": {"status": "active"}, + }, + ), + # Multiple options with empty values (option+ with multiple OPTION_VALUE? None) + ( + ".. test:: Test\n :tag1:\n :tag2:\n :tag3:\n", + { + "type": "test", + "title": "Test", + "options": {"tag1": None, "tag2": None, "tag3": None}, + }, + ), + # Option value with special chars (OPTION_VALUE: /[^\n]+/ accepts URLs, commas, hyphens) + ( + ".. impl:: Feature\n :link: https://example.com/issue#123\n :tags: feature,ui,high-priority\n", + { + "type": "impl", + "title": "Feature", + "options": { + "link": "https://example.com/issue#123", + "tags": "feature,ui,high-priority", + }, + }, + ), + # Option value containing colons (colons inside OPTION_VALUE are allowed) + ( + ".. req:: Requirement\n :time: 10:30 AM\n", + { + "type": "req", + "title": "Requirement", + "options": {"time": "10:30 AM"}, + }, + ), + # Unicode characters in title (NAME, TITLE, TEXT_NO_COLUMN handle non-ASCII) + ( + ".. req:: Función de exportación 导出功能\n", + {"type": "req", "title": "Función de exportación 导出功能"}, + ), + # Content with blank lines between paragraphs (multiple newlines in content block) + ( + ".. impl:: Feature\n\n First paragraph.\n Still first paragraph.\n\n Second paragraph here.\n Still second paragraph.\n", + { + "type": "impl", + "title": "Feature", + "content": "First paragraph.\nStill first paragraph.\n\nSecond paragraph here.\nStill second paragraph.", + }, + ), + # Complex case: inline title + continuation + options + content (all grammar paths) + ( + ".. spec:: Main Title\n Title continuation\n :version: 2.0\n :author: Team\n\n Content paragraph one.\n Content paragraph two.\n", + { + "type": "spec", + "title": "Main Title Title continuation", + "options": {"version": "2.0", "author": "Team"}, + "content": "Content paragraph one.\nContent paragraph two.", + }, + ), ], ) def test_sn_rst_parser(text: str, expected: dict): From db25b2eb150287817a2f06509bfbebf18088884d Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Mon, 3 Nov 2025 23:39:56 +0100 Subject: [PATCH 05/25] update docs --- docs/source/components/analyse.rst | 79 +++++++++++++++++-- docs/source/components/configuration.rst | 3 + docs/source/components/directive.rst | 2 +- docs/src_trace.toml | 1 + .../sn_rst_parser.py | 0 tests/test_rst_parser.py | 2 +- 6 files changed, 77 insertions(+), 10 deletions(-) rename src/sphinx_codelinks/{sphinx_extension => analyse}/sn_rst_parser.py (100%) diff --git a/docs/source/components/analyse.rst b/docs/source/components/analyse.rst index b57007d..9aeab5b 100644 --- a/docs/source/components/analyse.rst +++ b/docs/source/components/analyse.rst @@ -40,7 +40,7 @@ Use simplified comment patterns to define **Sphinx-Needs** items without complex Marked RST Blocks ~~~~~~~~~~~~~~~~~ -Embed complete reStructuredText content within source code comments for rich documentation that can be extracted and processed. +Embed complete reStructuredText directives which is extracted and parsed as the grammar of **Sphinx-Needs** definition blocks. Limitations ----------- @@ -55,6 +55,8 @@ Extraction Examples The following examples are configured with :ref:`the analyse configuration `, +.. _`analyse_need_id_refs`: + Sphinx-Needs ID References ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -118,6 +120,8 @@ Below is an example of a C++ source file containing need ID references and the c - ``marker`` - The marker string used for identification - ``type`` - Type of extraction ("need-id-refs") +.. _`analyse_rst`: + Marked RST Blocks ~~~~~~~~~~~~~~~~~ @@ -126,6 +130,7 @@ This example demonstrates how the analyse extracts RST blocks from comments. .. tabs:: .. code-tab:: cpp + :linenos: #include @@ -190,6 +195,8 @@ The module supports both multi-line and single-line RST blocks: - **Multi-line blocks**: Use ``@rst`` and ``@endrst`` on separate lines - **Single-line blocks**: Use ``@rst content @endrst`` on the same line +.. _`analyse_oneline`: + One-line Needs -------------- @@ -199,14 +206,70 @@ For comprehensive information about one-line needs configuration and usage, see **Basic Example:** -.. code-block:: c - // @Function Implementation, IMPL_001, impl, [REQ_001, REQ_002] +.. tabs:: + + .. code-tab:: c + :linenos: + + // @Function Foo, IMPL_1 + void foo() {} + + // @Function Bar, IMPL_2 + void bar() {} -This single comment line creates a complete **Sphinx-Needs** item equivalent to: + // @Function Baz\, as I want it, IMPL_3 + void baz() {} -.. code-block:: rst + .. code-tab:: json - .. impl:: Function Implementation - :id: IMPL_001 - :links: REQ_001, REQ_002 + [ + { + "filepath": "/home/jui-wen/git_repo/ub/sphinx-codelinks/tests/data/oneline_comment_default/default_oneliners.c", + "remote_url": "https://github.com/useblocks/sphinx-codelinks/blob/951e40e7845f06d5cfc4ca20ebb984308fdaf985/tests/data/oneline_comment_default/default_oneliners.c#L1", + "source_map": { + "start": { "row": 0, "column": 4 }, + "end": { "row": 0, "column": 24 } + }, + "tagged_scope": "void foo() {}", + "need": { + "title": "Function Foo", + "id": "IMPL_1", + "type": "impl", + "links": [] + }, + "type": "need" + }, + { + "filepath": "/home/jui-wen/git_repo/ub/sphinx-codelinks/tests/data/oneline_comment_default/default_oneliners.c", + "remote_url": "https://github.com/useblocks/sphinx-codelinks/blob/951e40e7845f06d5cfc4ca20ebb984308fdaf985/tests/data/oneline_comment_default/default_oneliners.c#L4", + "source_map": { + "start": { "row": 3, "column": 4 }, + "end": { "row": 3, "column": 24 } + }, + "tagged_scope": "void bar() {}", + "need": { + "title": "Function Bar", + "id": "IMPL_2", + "type": "impl", + "links": [] + }, + "type": "need" + }, + { + "filepath": "/home/jui-wen/git_repo/ub/sphinx-codelinks/tests/data/oneline_comment_default/default_oneliners.c", + "remote_url": "https://github.com/useblocks/sphinx-codelinks/blob/951e40e7845f06d5cfc4ca20ebb984308fdaf985/tests/data/oneline_comment_default/default_oneliners.c#L7", + "source_map": { + "start": { "row": 6, "column": 4 }, + "end": { "row": 6, "column": 39 } + }, + "tagged_scope": "void baz() {}", + "need": { + "title": "Function Baz, as I want it", + "id": "IMPL_3", + "type": "impl", + "links": [] + }, + "type": "need" + } + ] diff --git a/docs/source/components/configuration.rst b/docs/source/components/configuration.rst index e69908c..afb93a8 100644 --- a/docs/source/components/configuration.rst +++ b/docs/source/components/configuration.rst @@ -373,6 +373,7 @@ Configures how **Sphinx-CodeLinks** analyse source files to extract markers from [codelinks.projects.my_project.analyse.marked_rst] start_sequence = "@rst" end_sequence = "@endrst" + link_options = ["links"] get_need_id_refs ^^^^^^^^^^^^^^^^ @@ -491,8 +492,10 @@ Configuration for marked RST block extraction. [codelinks.projects.my_project.analyse.marked_rst] start_sequence = "@rst" end_sequence = "@endrst" + link_options = ["links"] **Configuration fields:** - ``start_sequence`` (``str``) - Marker that begins an RST block - ``end_sequence`` (``str``) - Marker that ends an RST block +- ``link_options`` (``list[str]``) - List of option names whose values should be treated as Sphinx-Needs link fields diff --git a/docs/source/components/directive.rst b/docs/source/components/directive.rst index d73bd98..99d777b 100644 --- a/docs/source/components/directive.rst +++ b/docs/source/components/directive.rst @@ -3,7 +3,7 @@ Directive ========= -.. attention:: ``src-trace`` directive currently only supports :ref:`one-line need definition `. +.. attention:: ``src-trace`` directive do NOT supports :ref:`Sphinx-Needs ID Refs `. ``src-trace`` Directive generates Sphinx-Needs items from source code comments. There are two ways to define need items in source code: diff --git a/docs/src_trace.toml b/docs/src_trace.toml index cc7a6a3..5cbb068 100644 --- a/docs/src_trace.toml +++ b/docs/src_trace.toml @@ -15,6 +15,7 @@ src_dir = "../tests/data/dcdc" # Relative path from this TOML config to the sour [codelinks.projects.dcdc.analyse] get_need_id_refs = false get_oneline_needs = true +get_rst = true [codelinks.projects.dcdc.analyse.oneline_comment_style] # Configuration for oneline comment style diff --git a/src/sphinx_codelinks/sphinx_extension/sn_rst_parser.py b/src/sphinx_codelinks/analyse/sn_rst_parser.py similarity index 100% rename from src/sphinx_codelinks/sphinx_extension/sn_rst_parser.py rename to src/sphinx_codelinks/analyse/sn_rst_parser.py diff --git a/tests/test_rst_parser.py b/tests/test_rst_parser.py index b376bc1..55cd653 100644 --- a/tests/test_rst_parser.py +++ b/tests/test_rst_parser.py @@ -1,6 +1,6 @@ import pytest -from sphinx_codelinks.sphinx_extension.sn_rst_parser import parse_rst +from sphinx_codelinks.analyse.sn_rst_parser import parse_rst @pytest.mark.parametrize( From 7cfba7998f2dc73784c9c08fb3f4e0f088986d83 Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Tue, 4 Nov 2025 10:25:40 +0100 Subject: [PATCH 06/25] updated --- src/sphinx_codelinks/analyse/analyse.py | 26 +++++++++++++++++++ src/sphinx_codelinks/analyse/models.py | 1 + .../{sphinx_extension => analyse}/sn_rst.lark | 0 src/sphinx_codelinks/analyse/sn_rst_parser.py | 9 ++++--- tests/test_rst_parser.py | 25 +++++++++++++++++- 5 files changed, 57 insertions(+), 4 deletions(-) rename src/sphinx_codelinks/{sphinx_extension => analyse}/sn_rst.lark (100%) diff --git a/src/sphinx_codelinks/analyse/analyse.py b/src/sphinx_codelinks/analyse/analyse.py index 215589b..2bfcd21 100644 --- a/src/sphinx_codelinks/analyse/analyse.py +++ b/src/sphinx_codelinks/analyse/analyse.py @@ -5,6 +5,7 @@ from pathlib import Path from typing import Any, TypedDict +from lark import UnexpectedInput from tree_sitter import Node as TreeSitterNode from sphinx_codelinks.analyse import utils @@ -21,6 +22,7 @@ OnelineParserInvalidWarning, oneline_parser, ) +from sphinx_codelinks.analyse.sn_rst_parser import parse_rst from sphinx_codelinks.config import ( UNIX_NEWLINE, OneLineCommentStyle, @@ -76,6 +78,7 @@ def __init__( self.git_root if self.git_root else self.analyse_config.src_dir ) self.oneline_warnings: list[AnalyseWarning] = [] + self.rst_warnings: list[AnalyseWarning] = [] def get_src_strings(self) -> Generator[tuple[Path, bytes], Any, None]: # type: ignore[explicit-any] """Load source files and extract their content.""" @@ -303,6 +306,11 @@ def extract_marked_rst( "column": extracted_rst["end_idx"], }, } + resolved = parse_rst(rst_text) + if isinstance(resolved, UnexpectedInput): + self.handle_rst_warning(resolved, src_comment, rst_text) + return None + return MarkedRst( filepath, remote_url, @@ -310,6 +318,24 @@ def extract_marked_rst( src_comment, tagged_scope, rst_text, + resolved, + ) + + def handle_rst_warning( + self, warning: UnexpectedInput, src_comment: SourceComment, rst_text + ) -> None: + """Handle RST parsing warnings.""" + if not src_comment.source_file: + return + lineno = src_comment.node.start_point.row + warning.line + 1 + self.rst_warnings.append( + AnalyseWarning( + str(src_comment.source_file.filepath), + lineno, + f"{warning.get_context(rst_text)}\n{warning!s}", + MarkedContentType.rst, + "parsing_error", + ) ) def extract_marked_content(self) -> None: diff --git a/src/sphinx_codelinks/analyse/models.py b/src/sphinx_codelinks/analyse/models.py index 856f0a0..259add6 100644 --- a/src/sphinx_codelinks/analyse/models.py +++ b/src/sphinx_codelinks/analyse/models.py @@ -83,4 +83,5 @@ class OneLineNeed(Metadata): @dataclass class MarkedRst(Metadata): rst: str + need: dict[str, str | list[str]] | None = None type: MarkedContentType = field(init=False, default=MarkedContentType.rst) diff --git a/src/sphinx_codelinks/sphinx_extension/sn_rst.lark b/src/sphinx_codelinks/analyse/sn_rst.lark similarity index 100% rename from src/sphinx_codelinks/sphinx_extension/sn_rst.lark rename to src/sphinx_codelinks/analyse/sn_rst.lark diff --git a/src/sphinx_codelinks/analyse/sn_rst_parser.py b/src/sphinx_codelinks/analyse/sn_rst_parser.py index 85d74e2..d7d02b9 100644 --- a/src/sphinx_codelinks/analyse/sn_rst_parser.py +++ b/src/sphinx_codelinks/analyse/sn_rst_parser.py @@ -3,7 +3,7 @@ # ruff: noqa: N802 from pathlib import Path -from lark import Lark, Transformer, v_args +from lark import Lark, Transformer, UnexpectedInput, v_args @v_args(inline=True) @@ -101,10 +101,13 @@ def get_parser() -> Lark: return parser -def parse_rst(text: str) -> dict: +def parse_rst(text: str) -> dict | UnexpectedInput: """Parse the given RST directive text and return the parsed data.""" parser = get_parser() - tree = parser.parse(text) + try: + tree = parser.parse(text) + except UnexpectedInput as e: + return e transformer = DirectiveTransformer() result = transformer.transform(tree) return result diff --git a/tests/test_rst_parser.py b/tests/test_rst_parser.py index 55cd653..f6dcbae 100644 --- a/tests/test_rst_parser.py +++ b/tests/test_rst_parser.py @@ -1,3 +1,4 @@ +from lark import UnexpectedInput import pytest from sphinx_codelinks.analyse.sn_rst_parser import parse_rst @@ -142,6 +143,28 @@ ), ], ) -def test_sn_rst_parser(text: str, expected: dict): +def test_sn_rst_parser_positive(text: str, expected: dict): result = parse_rst(text) assert result == expected + + +@pytest.mark.parametrize( + ("text"), + [ + # Missing directive type + (".. :: Missing type\n"), + # Improper indentation (option line not indented) + (".. impl:: Title\n:option: value\n"), + # Content without blank line separator + (".. spec:: Title\n :option: value\n Content without blank line.\n"), + # Invalid characters in directive type + (".. re@q:: Invalid type\n"), + # Title line that looks like an option + (".. req:: :notanoption:\n"), + # Content block without proper indentation + (".. impl:: Title\nContent not indented properly.\n"), + ], +) +def test_sn_rst_parser_negative(text: str): + warning = parse_rst(text) + assert isinstance(warning, UnexpectedInput) From 3f889c9047358e5bae0d04d590b1eee52a3351cb Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Tue, 4 Nov 2025 14:36:21 +0100 Subject: [PATCH 07/25] only allow inline argument --- src/sphinx_codelinks/analyse/sn_rst.lark | 8 ++-- src/sphinx_codelinks/analyse/sn_rst_parser.py | 9 ++-- tests/test_rst_parser.py | 42 ++----------------- 3 files changed, 10 insertions(+), 49 deletions(-) diff --git a/src/sphinx_codelinks/analyse/sn_rst.lark b/src/sphinx_codelinks/analyse/sn_rst.lark index 9cb4a95..a557c1a 100644 --- a/src/sphinx_codelinks/analyse/sn_rst.lark +++ b/src/sphinx_codelinks/analyse/sn_rst.lark @@ -1,12 +1,10 @@ start: directive -directive: INDENT_DIRECTIVE? ".." _WS NAME "::" _NEWLINE? (_WS title_block)? directive_block? +directive: INDENT_DIRECTIVE? ".." _WS NAME "::" _NEWLINE? directive_block? -title_block: TEXT_NO_COLUMN _NEWLINE multi_lines_title* | _NEWLINE multi_lines_title+ +directive_block: inline_title _NEWLINE | inline_title _NEWLINE options_block (_NEWLINE content_block)? | inline_title _NEWLINE _NEWLINE content_block | _NEWLINE content_block -multi_lines_title: INDENT TEXT_NO_COLUMN _NEWLINE - -directive_block: options_block (_NEWLINE content_block)? | _NEWLINE content_block +inline_title: TEXT_NO_COLUMN options_block: option+ diff --git a/src/sphinx_codelinks/analyse/sn_rst_parser.py b/src/sphinx_codelinks/analyse/sn_rst_parser.py index d7d02b9..51496af 100644 --- a/src/sphinx_codelinks/analyse/sn_rst_parser.py +++ b/src/sphinx_codelinks/analyse/sn_rst_parser.py @@ -33,11 +33,8 @@ def INDENT(self, tok): def NEWLINE_IN_CONTENT(self, tok): return str(tok) - def multi_lines_title(self, *title_line): - return title_line[1] - - def title_block(self, *titles): - return {"title": " ".join(titles)} + def inline_title(self, text): + return {"title": text.strip()} # strip leading/trailing whitespace def option(self, _indent, name, value=None): return (name, value) @@ -93,7 +90,7 @@ def get_parser() -> Lark: parser = Lark( grammar, start="directive", - parser="earley", + parser="lalr", propagate_positions=True, maybe_placeholders=False, ) diff --git a/tests/test_rst_parser.py b/tests/test_rst_parser.py index f6dcbae..769800d 100644 --- a/tests/test_rst_parser.py +++ b/tests/test_rst_parser.py @@ -17,25 +17,6 @@ ".. req:: title1\n", {"type": "req", "title": "title1"}, ), - # Inline title + indented continuation lines (TEXT_NO_COLUMN concatenates multi-line titles) - ( - ".. req:: multi-line title1\n still title2\n still title3\n", - {"type": "req", "title": "multi-line title1 still title2 still title3"}, - ), - # Title entirely on indented lines (title_block alternative: _NEWLINE multi_lines_title+) - ( - ".. req:: \n multi-line title1\n still title2\n still title3\n", - {"type": "req", "title": "multi-line title1 still title2 still title3"}, - ), - # Indented title stops at option line (TEXT_NO_COLUMN rejects :option: pattern) - ( - ".. req:: \n multi-line title1\n still title2\n :option:\n", - { - "type": "req", - "title": "multi-line title1 still title2", - "options": {"option": None}, - }, - ), # Title + content block with blank line separator (directive_block content path) ( ".. impl:: no options but content\n\n This is the implementation content.\n It spans multiple lines.\n", @@ -78,15 +59,6 @@ ".. req:: title with spaces \n", {"type": "req", "title": "title with spaces"}, ), - # Inline title continuation + options (multi_lines_title* stops at :option: line) - ( - ".. impl:: Initial title\n continuation of title\n :status: active\n", - { - "type": "impl", - "title": "Initial title continuation of title", - "options": {"status": "active"}, - }, - ), # Multiple options with empty values (option+ with multiple OPTION_VALUE? None) ( ".. test:: Test\n :tag1:\n :tag2:\n :tag3:\n", @@ -131,16 +103,6 @@ "content": "First paragraph.\nStill first paragraph.\n\nSecond paragraph here.\nStill second paragraph.", }, ), - # Complex case: inline title + continuation + options + content (all grammar paths) - ( - ".. spec:: Main Title\n Title continuation\n :version: 2.0\n :author: Team\n\n Content paragraph one.\n Content paragraph two.\n", - { - "type": "spec", - "title": "Main Title Title continuation", - "options": {"version": "2.0", "author": "Team"}, - "content": "Content paragraph one.\nContent paragraph two.", - }, - ), ], ) def test_sn_rst_parser_positive(text: str, expected: dict): @@ -163,6 +125,10 @@ def test_sn_rst_parser_positive(text: str, expected: dict): (".. req:: :notanoption:\n"), # Content block without proper indentation (".. impl:: Title\nContent not indented properly.\n"), + # multiline title not allowed + (".. req:: Title line one\n Title line two\n"), + # non-inline/indented title not allowed + (".. req:: \n Title line one\n"), ], ) def test_sn_rst_parser_negative(text: str): From 320402ef7bf359e3652a03b2ea2d32eaa71ac57b Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Tue, 4 Nov 2025 14:57:40 +0100 Subject: [PATCH 08/25] update --- docs/source/components/rst_parser.rst | 17 +++++++++++++++++ docs/source/index.rst | 1 + tests/test_rst_parser.py | 10 ++++++++++ 3 files changed, 28 insertions(+) create mode 100644 docs/source/components/rst_parser.rst diff --git a/docs/source/components/rst_parser.rst b/docs/source/components/rst_parser.rst new file mode 100644 index 0000000..018b806 --- /dev/null +++ b/docs/source/components/rst_parser.rst @@ -0,0 +1,17 @@ +Simplified reStructuredText Parser +================================== + +The :ref:`analyse ` module provides a simplified parser for reStructuredText (reST) directives using the ``Lark`` parsing library. +It is designed to only parse the RST text extracted by :ref:`RST markers `, focusing on specific directive types and their associated options and content. +By doing so, the parser avoids the complexity of a full reST parser while still capturing the essential structure needed for Sphinx-Needs integration from the source code. + +The parser does't have the Sphinx-Needs directive validation logic. It only checks the syntax of the reST directives and extracts the directive type, argument, options, and content. + +**Limitations** + +Since the parser does not implement the full reST specification, it has some limitations: + +- Comments in the RST text are not supported. +- The parser expects proper indentation for options and content blocks. +- It only takes an inline directive argument/title (no multi-line arguments/titles). +- It only takes inline option values (no multi-line option values). diff --git a/docs/source/index.rst b/docs/source/index.rst index 94ff269..d95c40d 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -62,6 +62,7 @@ Contents components/cli components/configuration components/directive + components/rst_parser components/oneline components/analyse components/discover diff --git a/tests/test_rst_parser.py b/tests/test_rst_parser.py index 769800d..df761d1 100644 --- a/tests/test_rst_parser.py +++ b/tests/test_rst_parser.py @@ -103,6 +103,16 @@ "content": "First paragraph.\nStill first paragraph.\n\nSecond paragraph here.\nStill second paragraph.", }, ), + # comment in content + ( + ".. spec:: comment in content\n :option: value\n\n .. commentline\n Content without blank line.\n", + { + "type": "spec", + "title": "comment in content", + "options": {"option": "value"}, + "content": ".. commentline\nContent without blank line.", + }, + ), ], ) def test_sn_rst_parser_positive(text: str, expected: dict): From 62e685c134275c864c7927b97cc66764ce768d30 Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Wed, 5 Nov 2025 10:58:20 +0100 Subject: [PATCH 09/25] update --- docs/source/components/configuration.rst | 4 + docs/source/components/rst_parser.rst | 2 +- src/sphinx_codelinks/analyse/analyse.py | 14 ++- src/sphinx_codelinks/analyse/sn_rst_parser.py | 92 ++++++++++++++++--- src/sphinx_codelinks/config.py | 16 +++- 5 files changed, 111 insertions(+), 17 deletions(-) diff --git a/docs/source/components/configuration.rst b/docs/source/components/configuration.rst index afb93a8..453b68e 100644 --- a/docs/source/components/configuration.rst +++ b/docs/source/components/configuration.rst @@ -492,10 +492,14 @@ Configuration for marked RST block extraction. [codelinks.projects.my_project.analyse.marked_rst] start_sequence = "@rst" end_sequence = "@endrst" + strip_leading_sequences = ["*"] + indented_spaces = 3 link_options = ["links"] **Configuration fields:** - ``start_sequence`` (``str``) - Marker that begins an RST block - ``end_sequence`` (``str``) - Marker that ends an RST block +- ``strip_leading_sequences`` (``list[str]``) - List of leading sequences to strip from each line of the RST text between the markers +- ``indented_spaces`` (``int``) - Number of leading spaces to consider as indentation in the RST text - ``link_options`` (``list[str]``) - List of option names whose values should be treated as Sphinx-Needs link fields diff --git a/docs/source/components/rst_parser.rst b/docs/source/components/rst_parser.rst index 018b806..e5ae8fc 100644 --- a/docs/source/components/rst_parser.rst +++ b/docs/source/components/rst_parser.rst @@ -12,6 +12,6 @@ The parser does't have the Sphinx-Needs directive validation logic. It only chec Since the parser does not implement the full reST specification, it has some limitations: - Comments in the RST text are not supported. -- The parser expects proper indentation for options and content blocks. +- The parser expects consistent indentation for options and content blocks. - It only takes an inline directive argument/title (no multi-line arguments/titles). - It only takes inline option values (no multi-line option values). diff --git a/src/sphinx_codelinks/analyse/analyse.py b/src/sphinx_codelinks/analyse/analyse.py index 2bfcd21..cee743f 100644 --- a/src/sphinx_codelinks/analyse/analyse.py +++ b/src/sphinx_codelinks/analyse/analyse.py @@ -282,7 +282,7 @@ def extract_marked_rst( if UNIX_NEWLINE in extracted_rst["rst_text"]: rst_text = utils.remove_leading_sequences( extracted_rst["rst_text"], - self.analyse_config.marked_rst_config.leading_sequences, + self.analyse_config.marked_rst_config.strip_leading_sequences, ) else: rst_text = extracted_rst["rst_text"] @@ -306,10 +306,20 @@ def extract_marked_rst( "column": extracted_rst["end_idx"], }, } - resolved = parse_rst(rst_text) + resolved = parse_rst( + rst_text, self.analyse_config.marked_rst_config.indented_spaces + ) if isinstance(resolved, UnexpectedInput): self.handle_rst_warning(resolved, src_comment, rst_text) return None + if resolved: + # convert link options values to list + for key, val in resolved.items(): + if ( + key in self.analyse_config.marked_rst_config.link_options + and isinstance(val, str) + ): + resolved[key] = [val.split(",")] return MarkedRst( filepath, diff --git a/src/sphinx_codelinks/analyse/sn_rst_parser.py b/src/sphinx_codelinks/analyse/sn_rst_parser.py index 51496af..be7d5c2 100644 --- a/src/sphinx_codelinks/analyse/sn_rst_parser.py +++ b/src/sphinx_codelinks/analyse/sn_rst_parser.py @@ -1,10 +1,48 @@ """Test script for RST directive Lark parser.""" # ruff: noqa: N802 -from pathlib import Path - +# TODO: Not sure Lark is the right tool for this job since the it has a few limitations such as lack of support for dynamic indentation levels while extracting leading spaces in content. +# Consider switching to Visitor instead of Transformer to have more control on resolving the tree or implement a custom parser if needed. from lark import Lark, Transformer, UnexpectedInput, v_args +LARK_GRAMMER = rf""" +start: directive + +directive: INDENT_DIRECTIVE? ".." _WS NAME "::" _NEWLINE? directive_block? + +directive_block: inline_title _NEWLINE | inline_title _NEWLINE options_block (_NEWLINE content_block)? | inline_title _NEWLINE _NEWLINE content_block | _NEWLINE content_block + +inline_title: TEXT_NO_COLUMN + +options_block: option+ + +option: INDENT OPTION_NAME _WS? OPTION_VALUE? _NEWLINE + +content_block: content_line+ + +content_line: INDENT TEXT _NEWLINE | _NEWLINE + +INDENT: {" " * 3} + +OPTION_NAME: /:[a-zA-Z0-9_-]+:/ + +OPTION_VALUE: /[^\n]+/ + +NAME: /[a-zA-Z0-9_-]+/ + +TEXT_NO_COLUMN: /(?!.*:[a-zA-Z0-9_-]+:)[^\r\n]+/ + +TEXT: /[^\r\n]+/ + +NEWLINE_IN_CONTENT: /\r?\n/ + +_NEWLINE: /[ \t]*\r?\n/ + +_WS: /[ \t]+/ + +INDENT_DIRECTIVE: /[ \t]+/ +""" + @v_args(inline=True) class DirectiveTransformer(Transformer): @@ -80,12 +118,46 @@ def directive(self, name, *optionals): return need -def get_parser() -> Lark: - """Get the Lark parser for RST directives.""" - +def parse_rst(text: str, num_spaces: int = 3) -> dict | UnexpectedInput: + """Parse the given RST directive text and return the parsed data.""" # Load the grammar - grammar_path = Path(__file__).parent / "sn_rst.lark" - grammar = grammar_path.read_text() + grammar = rf""" +start: directive + +directive: INDENT_DIRECTIVE? ".." _WS NAME "::" _NEWLINE? directive_block? + +directive_block: inline_title _NEWLINE | inline_title _NEWLINE options_block (_NEWLINE content_block)? | inline_title _NEWLINE _NEWLINE content_block | _NEWLINE content_block + +inline_title: TEXT_NO_COLUMN + +options_block: option+ + +option: INDENT OPTION_NAME _WS? OPTION_VALUE? _NEWLINE + +content_block: content_line+ + +content_line: INDENT TEXT _NEWLINE | _NEWLINE + +INDENT: "{" " * num_spaces}" + +OPTION_NAME: /:[a-zA-Z0-9_-]+:/ + +OPTION_VALUE: /[^\n]+/ + +NAME: /[a-zA-Z0-9_-]+/ + +TEXT_NO_COLUMN: /(?!.*:[a-zA-Z0-9_-]+:)[^\r\n]+/ + +TEXT: /[^\r\n]+/ + +NEWLINE_IN_CONTENT: /\r?\n/ + +_NEWLINE: /[ \t]*\r?\n/ + +_WS: /[ \t]+/ + +INDENT_DIRECTIVE: /[ \t]+/ +""" parser = Lark( grammar, @@ -95,12 +167,6 @@ def get_parser() -> Lark: maybe_placeholders=False, ) - return parser - - -def parse_rst(text: str) -> dict | UnexpectedInput: - """Parse the given RST directive text and return the parsed data.""" - parser = get_parser() try: tree = parser.parse(text) except UnexpectedInput as e: diff --git a/src/sphinx_codelinks/config.py b/src/sphinx_codelinks/config.py index a026aae..e8b74a7 100644 --- a/src/sphinx_codelinks/config.py +++ b/src/sphinx_codelinks/config.py @@ -71,6 +71,9 @@ def check_schema(self) -> list[str]: class MarkedRstConfigType(TypedDict): start_sequence: str end_sequence: str + strip_leading_sequences: list[str] + indented_spaces: int + link_options: list[str] @dataclass @@ -81,16 +84,27 @@ def field_names(cls) -> set[str]: start_sequence: str = field(default="@rst", metadata={"schema": {"type": "string"}}) """Chars sequence to indicate the start of the rst text.""" + end_sequence: str = field( default="@endrst", metadata={"schema": {"type": "string"}} ) """Chars sequence to indicate the end of the rst text.""" - leading_sequences: list[str] = field( + + strip_leading_sequences: list[str] = field( default_factory=lambda: ["*"], metadata={"schema": {"type": "array", "items": {"type": "string"}}}, ) """List of leading sequences to be stripped from each line of the rst text.""" + indented_spaces: int = field(default=3, metadata={"schema": {"type": "integer"}}) + """The number of leading spaces to be considered as indentation in the rst text.""" + + link_options: list[str] = field( + default_factory=lambda: ["links"], + metadata={"schema": {"type": "array", "items": {"type": "string"}}}, + ) + """List of options in the rst directive that contain links.""" + @classmethod def get_schema(cls, name: str) -> dict[str, Any] | None: # type: ignore[explicit-any] _field = next(_field for _field in fields(cls) if _field.name is name) From c3f227b260b8a0345853121f4c7bc1e05231e05d Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Wed, 5 Nov 2025 12:12:55 +0100 Subject: [PATCH 10/25] update --- src/sphinx_codelinks/analyse/analyse.py | 3 +- src/sphinx_codelinks/analyse/sn_rst_parser.py | 49 ++----------------- tests/test_rst_parser.py | 15 ++++++ 3 files changed, 22 insertions(+), 45 deletions(-) diff --git a/src/sphinx_codelinks/analyse/analyse.py b/src/sphinx_codelinks/analyse/analyse.py index cee743f..b6b92ae 100644 --- a/src/sphinx_codelinks/analyse/analyse.py +++ b/src/sphinx_codelinks/analyse/analyse.py @@ -311,7 +311,8 @@ def extract_marked_rst( ) if isinstance(resolved, UnexpectedInput): self.handle_rst_warning(resolved, src_comment, rst_text) - return None + resolved = None + if resolved: # convert link options values to list for key, val in resolved.items(): diff --git a/src/sphinx_codelinks/analyse/sn_rst_parser.py b/src/sphinx_codelinks/analyse/sn_rst_parser.py index be7d5c2..8819c47 100644 --- a/src/sphinx_codelinks/analyse/sn_rst_parser.py +++ b/src/sphinx_codelinks/analyse/sn_rst_parser.py @@ -5,51 +5,13 @@ # Consider switching to Visitor instead of Transformer to have more control on resolving the tree or implement a custom parser if needed. from lark import Lark, Transformer, UnexpectedInput, v_args -LARK_GRAMMER = rf""" -start: directive - -directive: INDENT_DIRECTIVE? ".." _WS NAME "::" _NEWLINE? directive_block? - -directive_block: inline_title _NEWLINE | inline_title _NEWLINE options_block (_NEWLINE content_block)? | inline_title _NEWLINE _NEWLINE content_block | _NEWLINE content_block - -inline_title: TEXT_NO_COLUMN - -options_block: option+ - -option: INDENT OPTION_NAME _WS? OPTION_VALUE? _NEWLINE - -content_block: content_line+ - -content_line: INDENT TEXT _NEWLINE | _NEWLINE - -INDENT: {" " * 3} - -OPTION_NAME: /:[a-zA-Z0-9_-]+:/ - -OPTION_VALUE: /[^\n]+/ - -NAME: /[a-zA-Z0-9_-]+/ - -TEXT_NO_COLUMN: /(?!.*:[a-zA-Z0-9_-]+:)[^\r\n]+/ - -TEXT: /[^\r\n]+/ - -NEWLINE_IN_CONTENT: /\r?\n/ - -_NEWLINE: /[ \t]*\r?\n/ - -_WS: /[ \t]+/ - -INDENT_DIRECTIVE: /[ \t]+/ -""" - @v_args(inline=True) class DirectiveTransformer(Transformer): def NAME(self, tok): return str(tok) - def TITLE(self, tok): + def TEXT_NO_COLUMN(self, tok): return str(tok).strip() def OPTION_NAME(self, tok): @@ -61,9 +23,6 @@ def OPTION_VALUE(self, tok): def TEXT(self, tok): return str(tok) - def TEXT_NO_COLUMN(self, tok): - return str(tok).strip() - def INDENT(self, tok): """Return the length of the indent.""" return len(str(tok)) @@ -72,7 +31,7 @@ def NEWLINE_IN_CONTENT(self, tok): return str(tok) def inline_title(self, text): - return {"title": text.strip()} # strip leading/trailing whitespace + return {"title": text} # strip leading/trailing whitespace def option(self, _indent, name, value=None): return (name, value) @@ -166,7 +125,9 @@ def parse_rst(text: str, num_spaces: int = 3) -> dict | UnexpectedInput: propagate_positions=True, maybe_placeholders=False, ) - + if "\n" not in text: + # to make the grammar happy for single line input + text = text.strip() + "\n" try: tree = parser.parse(text) except UnexpectedInput as e: diff --git a/tests/test_rst_parser.py b/tests/test_rst_parser.py index df761d1..0922cd7 100644 --- a/tests/test_rst_parser.py +++ b/tests/test_rst_parser.py @@ -7,11 +7,26 @@ @pytest.mark.parametrize( ("text", "expected"), [ + # Minimal directive - only type, no title/options/content + ( + ".. req::", + {"type": "req"}, + ), + # Minimal directive - with trailing space no newline + ( + ".. req:: ", + {"type": "req"}, + ), # Minimal directive - only type, no title/options/content ( ".. req::\n", {"type": "req"}, ), + # Simple inline title with trailing spaces without newline + ( + ".. req:: title1 ", + {"type": "req", "title": "title1"}, + ), # Simple inline title on same line as directive marker ( ".. req:: title1\n", From 4a10cd2d086f5840f198210571ac81d63908bbce Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Wed, 5 Nov 2025 12:20:37 +0100 Subject: [PATCH 11/25] update test --- ...test_analyse[src_dir0-src_paths0].anchors.json | 15 +++++++++++++-- tests/data/marked_rst/dummy_1.cpp | 2 +- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/tests/__snapshots__/test_analyse/test_analyse[src_dir0-src_paths0].anchors.json b/tests/__snapshots__/test_analyse/test_analyse[src_dir0-src_paths0].anchors.json index 1904a62..55b9e90 100644 --- a/tests/__snapshots__/test_analyse/test_analyse[src_dir0-src_paths0].anchors.json +++ b/tests/__snapshots__/test_analyse/test_analyse[src_dir0-src_paths0].anchors.json @@ -14,6 +14,13 @@ }, "tagged_scope": "void dummy_func1(){\n //...\n }", "rst": ".. impl:: implement dummy function 1\n :id: IMPL_71\n", + "need": { + "type": "impl", + "title": "implement dummy function 1", + "options": { + "id": "IMPL_71" + } + }, "type": "rst" }, { @@ -26,11 +33,15 @@ }, "end": { "row": 13, - "column": 40 + "column": 41 } }, "tagged_scope": "int main() {\n std::cout << \"Starting demo_1...\" << std::endl;\n dummy_func1();\n std::cout << \"Demo_1 finished.\" << std::endl;\n return 0;\n }", - "rst": "..impl:: implement main function ", + "rst": ".. impl:: implement main function ", + "need": { + "type": "impl", + "title": "implement main function" + }, "type": "rst" }, { diff --git a/tests/data/marked_rst/dummy_1.cpp b/tests/data/marked_rst/dummy_1.cpp index 1fed979..1d8bdc9 100644 --- a/tests/data/marked_rst/dummy_1.cpp +++ b/tests/data/marked_rst/dummy_1.cpp @@ -10,7 +10,7 @@ //... } - // @rst..impl:: implement main function @endrst + // @rst.. impl:: implement main function @endrst int main() { std::cout << "Starting demo_1..." << std::endl; dummy_func1(); From 774c20f5939b6472207fd065c4140d01724641c1 Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Wed, 5 Nov 2025 15:28:13 +0100 Subject: [PATCH 12/25] add preprocess --- tests/test_rst_parser.py | 63 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/tests/test_rst_parser.py b/tests/test_rst_parser.py index 0922cd7..9617e66 100644 --- a/tests/test_rst_parser.py +++ b/tests/test_rst_parser.py @@ -1,7 +1,7 @@ from lark import UnexpectedInput import pytest -from sphinx_codelinks.analyse.sn_rst_parser import parse_rst +from sphinx_codelinks.analyse.sn_rst_parser import parse_rst, preprocess_rst @pytest.mark.parametrize( @@ -159,3 +159,64 @@ def test_sn_rst_parser_positive(text: str, expected: dict): def test_sn_rst_parser_negative(text: str): warning = parse_rst(text) assert isinstance(warning, UnexpectedInput) + + +@pytest.mark.parametrize( + ("text", "expected"), + [ + # No leading chars - text is already properly aligned + ( + ".. req:: Title\n", + ".. req:: Title\n", + ), + # Single line without newline - adds newline and strips leading/trailing spaces + ( + ".. req:: Title", + ".. req:: Title\n", + ), + # Single line with 3 leading spaces - strips and adds newline + ( + " .. req:: Title", + ".. req:: Title\n", + ), + # Multi-line with consistent indentation - no change + ( + ".. req:: Title\n :option: value\n", + ".. req:: Title\n :option: value\n", + ), + # Text with 3 leading spaces before directive marker + ( + " .. req:: 3 leading spaces\n :option: value\n", + ".. req:: 3 leading spaces\n :option: value\n", + ), + # Empty string - returns newline (edge case) + ( + "", + "", + ), + # Only whitespace - strips and adds newline + ( + " ", + " ", + ), + # No directive marker found - returns as-is with newline added if missing + ( + "This is not a directive", + "This is not a directive", + ), + # Directive marker not at expected position - handles gracefully + ( + "Some text .. req:: Title\n", + ".. req:: Title\n", + ), + # Multi-line with trailing spaces + ( + ".. req:: Title \n :option: value \n", + ".. req:: Title \n :option: value \n", + ), + ], +) +def test_preprocess_rst(text: str, expected: str): + """Test preprocess_rst function normalizes input for parser.""" + result = preprocess_rst(text) + assert result == expected From ca938fbfaf1be0f382474b73057450a98e3eda62 Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Wed, 5 Nov 2025 15:40:48 +0100 Subject: [PATCH 13/25] adapt src-trace --- docs/ubproject.toml | 2 +- src/sphinx_codelinks/analyse/analyse.py | 10 ++-- src/sphinx_codelinks/analyse/sn_rst_parser.py | 33 +++++++++++-- .../sphinx_extension/directives/src_trace.py | 46 +++++++++++-------- tests/data/dcdc/charge/demo_2.cpp | 1 + 5 files changed, 64 insertions(+), 28 deletions(-) diff --git a/docs/ubproject.toml b/docs/ubproject.toml index a4e497b..04edfbd 100644 --- a/docs/ubproject.toml +++ b/docs/ubproject.toml @@ -4,7 +4,7 @@ ignore = ["block.title_line"] [needs] -id_required = true +id_required = false [parse.extend_directives.src-trace] argument = false diff --git a/src/sphinx_codelinks/analyse/analyse.py b/src/sphinx_codelinks/analyse/analyse.py index b6b92ae..6aa7cba 100644 --- a/src/sphinx_codelinks/analyse/analyse.py +++ b/src/sphinx_codelinks/analyse/analyse.py @@ -313,14 +313,18 @@ def extract_marked_rst( self.handle_rst_warning(resolved, src_comment, rst_text) resolved = None - if resolved: - # convert link options values to list - for key, val in resolved.items(): + if resolved and "options" in resolved: + # flatten options + for key, val in resolved["options"].items(): if ( key in self.analyse_config.marked_rst_config.link_options and isinstance(val, str) ): + # convert link options values to list resolved[key] = [val.split(",")] + else: + resolved[key] = val + del resolved["options"] return MarkedRst( filepath, diff --git a/src/sphinx_codelinks/analyse/sn_rst_parser.py b/src/sphinx_codelinks/analyse/sn_rst_parser.py index 8819c47..d49fbc6 100644 --- a/src/sphinx_codelinks/analyse/sn_rst_parser.py +++ b/src/sphinx_codelinks/analyse/sn_rst_parser.py @@ -6,6 +6,10 @@ from lark import Lark, Transformer, UnexpectedInput, v_args +class PreProcessError(Exception): + """Custom error for preprocess issues.""" + + @v_args(inline=True) class DirectiveTransformer(Transformer): def NAME(self, tok): @@ -118,6 +122,8 @@ def parse_rst(text: str, num_spaces: int = 3) -> dict | UnexpectedInput: INDENT_DIRECTIVE: /[ \t]+/ """ + processed_text = preprocess_rst(text) + parser = Lark( grammar, start="directive", @@ -125,13 +131,32 @@ def parse_rst(text: str, num_spaces: int = 3) -> dict | UnexpectedInput: propagate_positions=True, maybe_placeholders=False, ) - if "\n" not in text: - # to make the grammar happy for single line input - text = text.strip() + "\n" + try: - tree = parser.parse(text) + tree = parser.parse(processed_text) except UnexpectedInput as e: return e transformer = DirectiveTransformer() result = transformer.transform(tree) return result + + +def preprocess_rst(text: str) -> str: + """Only process valid RST directive text by stripping leading spaces before the directive marker.""" + if not text: + # empty string, return as is + return text + lines = text.splitlines(keepends=True) + idx_directive = lines[0].find( + ".." + ) # expect the first line is the start of the RST directive + if idx_directive == -1: + # do nothing and let parser to handle it + return text + + stripped_lines = [line[idx_directive:] for line in lines] + stripped_text = "".join(stripped_lines) + if "\n" not in text: + # to make the grammar happy for single line input + stripped_text = stripped_text.strip() + "\n" + return stripped_text diff --git a/src/sphinx_codelinks/sphinx_extension/directives/src_trace.py b/src/sphinx_codelinks/sphinx_extension/directives/src_trace.py index 3e46e51..5313970 100644 --- a/src/sphinx_codelinks/sphinx_extension/directives/src_trace.py +++ b/src/sphinx_codelinks/sphinx_extension/directives/src_trace.py @@ -12,7 +12,7 @@ from sphinx_needs.utils import add_doc # type: ignore[import-untyped] from sphinx_codelinks.analyse.analyse import SourceAnalyse -from sphinx_codelinks.analyse.models import OneLineNeed +from sphinx_codelinks.analyse.models import MarkedRst, NeedIdRefs, OneLineNeed from sphinx_codelinks.config import ( CodeLinksConfig, CodeLinksProjectConfigType, @@ -43,15 +43,18 @@ def get_rel_path(doc_path: Path, code_path: Path, base_dir: Path) -> tuple[Path, def generate_str_link_name( - oneline_need: OneLineNeed, + marked_content: OneLineNeed | MarkedRst, target_filepath: Path, dirs: dict[str, Path], local: bool = False, ) -> str: - if oneline_need.source_map["start"]["row"] == oneline_need.source_map["end"]["row"]: - lineno = f"L{oneline_need.source_map['start']['row'] + 1}" + if ( + marked_content.source_map["start"]["row"] + == marked_content.source_map["end"]["row"] + ): + lineno = f"L{marked_content.source_map['start']['row'] + 1}" else: - lineno = f"L{oneline_need.source_map['start']['row'] + 1}-L{oneline_need.source_map['end']['row'] + 1}" + lineno = f"L{marked_content.source_map['start']['row'] + 1}-L{marked_content.source_map['end']['row'] + 1}" # url = str(target_filepath.relative_to(target_dir)) + f"#{lineno}" if local: url = str(target_filepath) + f"#{lineno}" @@ -238,12 +241,15 @@ def render_needs( ) -> list[nodes.Node]: """Render the needs from the virtual docs""" rendered_needs: list[nodes.Node] = [] - for oneline_need in src_analyse.oneline_needs: + for marked_content in src_analyse.all_marked_content: + if isinstance(marked_content, NeedIdRefs): + # skip need_id_refs type + continue # # add source files into the dependency # # https://www.sphinx-doc.org/en/master/extdev/envapi.html#sphinx.environment.BuildEnvironment.note_dependency - # self.env.note_dependency(str(oneline_need.filepath.resolve())) + # self.env.note_dependency(str(marked_content.filepath.resolve())) - filepath = src_analyse.analyse_config.src_dir / oneline_need.filepath + filepath = src_analyse.analyse_config.src_dir / marked_content.filepath target_filepath = dirs["target_dir"] / filepath.relative_to(dirs["src_dir"]) # mapping between lineno and need link in docs for local url @@ -263,21 +269,21 @@ def render_needs( Path(self.env.docname), target_filepath, dirs["out_dir"] ) local_link_name = generate_str_link_name( - oneline_need, + marked_content, local_rel_path, dirs, local=True, ) if remote_url_field: remote_link_name = generate_str_link_name( - oneline_need, target_filepath, dirs, local=False + marked_content, target_filepath, dirs, local=False ) - if oneline_need.need: + if marked_content.need: # render needs from one-line marker kwargs: dict[str, str | list[str]] = { field_name: field_value - for field_name, field_value in oneline_need.need.items() + for field_name, field_value in marked_content.need.items() if field_name not in [ "title", @@ -290,27 +296,27 @@ def render_needs( if remote_url_field and remote_link_name is not None: kwargs[remote_url_field] = remote_link_name - oneline_needs: list[nodes.Node] = add_need( + a_need: list[nodes.Node] = add_need( app=self.env.app, # The Sphinx application object state=self.state, # The docutils state object docname=self.env.docname, # The current document name lineno=self.lineno, # The line number where the directive is used - need_type=str(oneline_need.need["type"]), # The type of the need - title=str(oneline_need.need["title"]), # The title of the need + need_type=str(marked_content.need["type"]), # The type of the need + title=str(marked_content.need["title"]), # The title of the need **cast(dict[str, Any], kwargs), # type: ignore[explicit-any] ) - rendered_needs.extend(oneline_needs) + rendered_needs.extend(a_need) if local_url_field: # save the mapping of need links and line numbers of source codes # for the later use in `html-collect-pages` if str(target_filepath) not in file_lineno_href.mappings: file_lineno_href.mappings[str(target_filepath)] = { - oneline_need.source_map["start"]["row"] - + 1: f"{docs_href}#{oneline_need.need['id']}" + marked_content.source_map["start"]["row"] + + 1: f"{docs_href}#{marked_content.need['id']}" } else: file_lineno_href.mappings[str(target_filepath)][ - oneline_need.source_map["start"]["row"] + 1 - ] = f"{docs_href}#{oneline_need.need['id']}" + marked_content.source_map["start"]["row"] + 1 + ] = f"{docs_href}#{marked_content.need['id']}" return rendered_needs diff --git a/tests/data/dcdc/charge/demo_2.cpp b/tests/data/dcdc/charge/demo_2.cpp index cd797e2..578116e 100644 --- a/tests/data/dcdc/charge/demo_2.cpp +++ b/tests/data/dcdc/charge/demo_2.cpp @@ -35,6 +35,7 @@ /** * @rst * .. impl:: Feature G - Data loss prevention + * :id: IMPL_main_demo2_rst * @endrst * Some description here. * [[ IMPL_main_demo2, main func in demo_2]] From 4d1bee6c3b08bd2af3493567109566f53903b0e9 Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Wed, 5 Nov 2025 15:41:56 +0100 Subject: [PATCH 14/25] updated snapshot --- .../test_analyse[src_dir0-src_paths0].anchors.json | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/__snapshots__/test_analyse/test_analyse[src_dir0-src_paths0].anchors.json b/tests/__snapshots__/test_analyse/test_analyse[src_dir0-src_paths0].anchors.json index 55b9e90..a1295c9 100644 --- a/tests/__snapshots__/test_analyse/test_analyse[src_dir0-src_paths0].anchors.json +++ b/tests/__snapshots__/test_analyse/test_analyse[src_dir0-src_paths0].anchors.json @@ -17,9 +17,7 @@ "need": { "type": "impl", "title": "implement dummy function 1", - "options": { - "id": "IMPL_71" - } + "id": "IMPL_71" }, "type": "rst" }, From 3a82d168fd0dafa1831b0fe422ab45f39bc39036 Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Wed, 5 Nov 2025 15:43:14 +0100 Subject: [PATCH 15/25] updated snapshot --- ...est_build_html[sphinx_project0-source_code0].doctree.xml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/__snapshots__/test_src_trace/test_build_html[sphinx_project0-source_code0].doctree.xml b/tests/__snapshots__/test_src_trace/test_build_html[sphinx_project0-source_code0].doctree.xml index 62f8cf8..028de79 100644 --- a/tests/__snapshots__/test_src_trace/test_build_html[sphinx_project0-source_code0].doctree.xml +++ b/tests/__snapshots__/test_src_trace/test_build_html[sphinx_project0-source_code0].doctree.xml @@ -9,12 +9,14 @@ - - + + + + From f0f085dad99054de672bc652cb273426e8c0cd60 Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Wed, 5 Nov 2025 16:26:03 +0100 Subject: [PATCH 16/25] update --- docs/source/components/rst_parser.rst | 2 +- src/sphinx_codelinks/analyse/analyse.py | 2 +- src/sphinx_codelinks/analyse/sn_rst.lark | 35 ------------------------ 3 files changed, 2 insertions(+), 37 deletions(-) delete mode 100644 src/sphinx_codelinks/analyse/sn_rst.lark diff --git a/docs/source/components/rst_parser.rst b/docs/source/components/rst_parser.rst index e5ae8fc..c080c46 100644 --- a/docs/source/components/rst_parser.rst +++ b/docs/source/components/rst_parser.rst @@ -5,7 +5,7 @@ The :ref:`analyse ` module provides a simplified parser for reStructure It is designed to only parse the RST text extracted by :ref:`RST markers `, focusing on specific directive types and their associated options and content. By doing so, the parser avoids the complexity of a full reST parser while still capturing the essential structure needed for Sphinx-Needs integration from the source code. -The parser does't have the Sphinx-Needs directive validation logic. It only checks the syntax of the reST directives and extracts the directive type, argument, options, and content. +The parser does't have the Sphinx-Needs directive validation logic. It only checks the syntax of the RST directives and extracts the directive type, argument, options, and content. **Limitations** diff --git a/src/sphinx_codelinks/analyse/analyse.py b/src/sphinx_codelinks/analyse/analyse.py index 6aa7cba..28c9f56 100644 --- a/src/sphinx_codelinks/analyse/analyse.py +++ b/src/sphinx_codelinks/analyse/analyse.py @@ -314,7 +314,7 @@ def extract_marked_rst( resolved = None if resolved and "options" in resolved: - # flatten options + # flatten options and convert link options values to list if needed for key, val in resolved["options"].items(): if ( key in self.analyse_config.marked_rst_config.link_options diff --git a/src/sphinx_codelinks/analyse/sn_rst.lark b/src/sphinx_codelinks/analyse/sn_rst.lark deleted file mode 100644 index a557c1a..0000000 --- a/src/sphinx_codelinks/analyse/sn_rst.lark +++ /dev/null @@ -1,35 +0,0 @@ -start: directive - -directive: INDENT_DIRECTIVE? ".." _WS NAME "::" _NEWLINE? directive_block? - -directive_block: inline_title _NEWLINE | inline_title _NEWLINE options_block (_NEWLINE content_block)? | inline_title _NEWLINE _NEWLINE content_block | _NEWLINE content_block - -inline_title: TEXT_NO_COLUMN - -options_block: option+ - -option: INDENT OPTION_NAME _WS? OPTION_VALUE? _NEWLINE - -content_block: content_line+ - -content_line: INDENT TEXT _NEWLINE | _NEWLINE - -INDENT: " " - -OPTION_NAME: /:[a-zA-Z0-9_-]+:/ - -OPTION_VALUE: /[^\n]+/ - -NAME: /[a-zA-Z0-9_-]+/ - -TEXT_NO_COLUMN: /(?!.*:[a-zA-Z0-9_-]+:)[^\r\n]+/ - -TEXT: /[^\r\n]+/ - -NEWLINE_IN_CONTENT: /\r?\n/ - -_NEWLINE: /[ \t]*\r?\n/ - -_WS: /[ \t]+/ - -INDENT_DIRECTIVE: /[ \t]+/ From bd71726f04c944ca07e4acf5f0feff66cef349f6 Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Wed, 5 Nov 2025 16:53:49 +0100 Subject: [PATCH 17/25] update docs --- docs/source/components/configuration.rst | 2 ++ docs/source/development/change_log.rst | 30 ++++++++++++++++++++++++ docs/source/development/roadmap.rst | 3 +-- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/docs/source/components/configuration.rst b/docs/source/components/configuration.rst index 453b68e..e6797b6 100644 --- a/docs/source/components/configuration.rst +++ b/docs/source/components/configuration.rst @@ -479,6 +479,8 @@ Configuration for Sphinx-Needs ID reference extraction. - ``markers`` (``list[str]``) - List of marker strings that identify need ID references +.. _analyse_marked_rst: + analyse.marked_rst ^^^^^^^^^^^^^^^^^^ diff --git a/docs/source/development/change_log.rst b/docs/source/development/change_log.rst index 97aa764..5984888 100644 --- a/docs/source/development/change_log.rst +++ b/docs/source/development/change_log.rst @@ -3,6 +3,36 @@ Changelog ========= +.. _`unreleased`: + +Unreleased +----------- + +New and Improved +................ + +- ✨ Added RST directive parser for ``analyse`` module to resolve the marked RST into Sphinx-Needs related data. + + Marked RST blocks can be resolved from the marked RST text in source files using the simplified RST directive parser. + The resolved RST blocks will be dumped into the JSON output along with other extracted markers. + To make the parser more stable, 3 new configuration options are added to control the parsing behavior: + + - ``leading_sequences``: List of leading character sequences to strip from each line. + + This option allows users to specify a list of leading character sequences (e.g., ``*``, ``-``) that should be stripped + from each line of the marked RST block before parsing. + This is useful for handling comment styles that include leading characters in multi-line comments. + + - ``indented_spaces``: Number of spaces that indicate an indented block. + - ``link_options``: List of option names whose values should be converted to lists when parsed. + +- ✨ Enhanced the ``src-trace`` directive to support creating need items from marked RST blocks in addition to one-line need definitions. + + The ``src-trace`` directive can now create Sphinx-Needs need items from both one-line need definitions and marked RST blocks. + This provides more flexibility for defining needs in source code comments. + +:Released: 02.10.2025 + .. _`release:1.1.0`: 1.1.0 diff --git a/docs/source/development/roadmap.rst b/docs/source/development/roadmap.rst index e40d490..fae1cde 100644 --- a/docs/source/development/roadmap.rst +++ b/docs/source/development/roadmap.rst @@ -17,7 +17,6 @@ Configuration Files Source Code Parsing ------------------- -- Introduce a configurable option to strip leading characters (e.g., ``*``) from commented RST blocks. - Enrich tagged scopes with additional metadata. - Extend language support by adding parsers for more comment styles, including but not limited to: @@ -28,5 +27,5 @@ Source Code Parsing Defining Needs in Source Code ----------------------------- -- Introduce flexible ways to define ``Sphinx-Needs`` items in source code, such as using raw RST text and multi-line comments. +- Introduce flexible ways to define ``Sphinx-Needs`` items in source code, such as custom multi-line need comments. - Implement a feature to export needs defined in source code to a ``needs.json`` file, improving CI workflows and portability. From 9f8369da5157a98bfad373e1ab11bea659bcb783 Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Mon, 10 Nov 2025 14:54:36 +0100 Subject: [PATCH 18/25] make mypy happy --- src/sphinx_codelinks/analyse/analyse.py | 27 +++++++++++-------- src/sphinx_codelinks/analyse/sn_rst_parser.py | 18 ++++++++++--- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/src/sphinx_codelinks/analyse/analyse.py b/src/sphinx_codelinks/analyse/analyse.py index 28c9f56..cb7771a 100644 --- a/src/sphinx_codelinks/analyse/analyse.py +++ b/src/sphinx_codelinks/analyse/analyse.py @@ -22,7 +22,7 @@ OnelineParserInvalidWarning, oneline_parser, ) -from sphinx_codelinks.analyse.sn_rst_parser import parse_rst +from sphinx_codelinks.analyse.sn_rst_parser import NeedDirectiveType, parse_rst from sphinx_codelinks.config import ( UNIX_NEWLINE, OneLineCommentStyle, @@ -306,25 +306,30 @@ def extract_marked_rst( "column": extracted_rst["end_idx"], }, } - resolved = parse_rst( + need_directive: None | NeedDirectiveType | UnexpectedInput = None + need_directive = parse_rst( rst_text, self.analyse_config.marked_rst_config.indented_spaces ) - if isinstance(resolved, UnexpectedInput): - self.handle_rst_warning(resolved, src_comment, rst_text) - resolved = None + if isinstance(need_directive, UnexpectedInput): + self.handle_rst_warning(need_directive, src_comment, rst_text) + need_directive = None - if resolved and "options" in resolved: + resolved: dict[str, str | list[str]] = ( + {key: val for key, val in need_directive.items() if key != "options"} # type: ignore[misc] # type `object` is filtered out by the condition + if need_directive + else {} + ) + if need_directive and "options" in need_directive: # flatten options and convert link options values to list if needed - for key, val in resolved["options"].items(): + for key, val in need_directive["options"].items(): # type: ignore[union-attr] # options existence checked if ( key in self.analyse_config.marked_rst_config.link_options and isinstance(val, str) ): # convert link options values to list - resolved[key] = [val.split(",")] + resolved[key] = val.split(",") else: resolved[key] = val - del resolved["options"] return MarkedRst( filepath, @@ -333,11 +338,11 @@ def extract_marked_rst( src_comment, tagged_scope, rst_text, - resolved, + resolved if resolved else None, ) def handle_rst_warning( - self, warning: UnexpectedInput, src_comment: SourceComment, rst_text + self, warning: UnexpectedInput, src_comment: SourceComment, rst_text: str ) -> None: """Handle RST parsing warnings.""" if not src_comment.source_file: diff --git a/src/sphinx_codelinks/analyse/sn_rst_parser.py b/src/sphinx_codelinks/analyse/sn_rst_parser.py index d49fbc6..db7098c 100644 --- a/src/sphinx_codelinks/analyse/sn_rst_parser.py +++ b/src/sphinx_codelinks/analyse/sn_rst_parser.py @@ -3,6 +3,9 @@ # ruff: noqa: N802 # TODO: Not sure Lark is the right tool for this job since the it has a few limitations such as lack of support for dynamic indentation levels while extracting leading spaces in content. # Consider switching to Visitor instead of Transformer to have more control on resolving the tree or implement a custom parser if needed. + +from typing import TypedDict + from lark import Lark, Transformer, UnexpectedInput, v_args @@ -10,8 +13,15 @@ class PreProcessError(Exception): """Custom error for preprocess issues.""" +class NeedDirectiveType(TypedDict, total=False): + type: str + title: str | None + options: dict[str, str] | None + content: str | None + + @v_args(inline=True) -class DirectiveTransformer(Transformer): +class DirectiveTransformer(Transformer): # type: ignore[type-arg] # disable type-arg due to lark Transformer generic issue def NAME(self, tok): return str(tok) @@ -64,7 +74,7 @@ def directive(self, name, *optionals): # NAME,, optional title/options/content need = {"type": name} # flaten optionals - flatten_optionals = [] + flatten_optionals: list[dict[str, str]] = [] for item in optionals: if isinstance(item, tuple): flatten_optionals.extend(item) @@ -81,7 +91,7 @@ def directive(self, name, *optionals): return need -def parse_rst(text: str, num_spaces: int = 3) -> dict | UnexpectedInput: +def parse_rst(text: str, num_spaces: int = 3) -> NeedDirectiveType | UnexpectedInput: """Parse the given RST directive text and return the parsed data.""" # Load the grammar grammar = rf""" @@ -137,7 +147,7 @@ def parse_rst(text: str, num_spaces: int = 3) -> dict | UnexpectedInput: except UnexpectedInput as e: return e transformer = DirectiveTransformer() - result = transformer.transform(tree) + result: NeedDirectiveType = transformer.transform(tree) return result From 884effdc2e75af1f030dff217754f2295e62d62b Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Mon, 10 Nov 2025 22:50:18 +0100 Subject: [PATCH 19/25] add TCs --- tests/conftest.py | 83 ++++++++++++++++++++++++++++++++++++++++ tests/test_analyse.py | 38 ++++++++++++++++++ tests/test_rst_parser.py | 10 +++++ 3 files changed, 131 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 47d6f78..9b1164d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,12 @@ +from collections.abc import Callable import json from pathlib import Path +from _pytest.mark import ParameterSet from docutils.nodes import document import pytest from syrupy.extensions.single_file import SingleFileSnapshotExtension, WriteMode +import yaml from sphinx_codelinks.config import OneLineCommentStyle @@ -101,3 +104,83 @@ def snapshot_marks(snapshot): Sanitize the reqif, to make the snapshots reproducible. """ return snapshot.with_defaults(extension_class=AnchorsSnapshotExtension) + + +def create_parameters( + *rel_paths: str, skip_files: None | list[str] = None +) -> list[ParameterSet]: + """Create parameters for a pytest param_file decorator.""" + paths: list[Path] = [] + for rel_path in rel_paths: + assert not Path(rel_path).is_absolute() + path = TEST_DIR.joinpath(rel_path) + if path.is_file(): + paths.append(path) + elif path.is_dir(): + paths.extend(path.glob("*.yaml")) + else: + raise FileNotFoundError(f"File / folder not found: {path}") + + if skip_files: + paths = [ + path for path in paths if str(path.relative_to(TEST_DIR)) not in skip_files + ] + + if not paths: + raise FileNotFoundError(f"No files found: {rel_paths}") + + if len(paths) == 1: + with paths[0].open(encoding="utf8") as f: + try: + data = yaml.safe_load(f) + except Exception as err: + raise OSError(f"Error loading {paths[0]}") from err + return [pytest.param(value, id=id) for id, value in data.items()] + else: + params: list[ParameterSet] = [] + for subpath in paths: + with subpath.open(encoding="utf8") as f: + try: + data = yaml.safe_load(f) + except Exception as err: + raise OSError(f"Error loading {subpath}") from err + for key, value in data.items(): + params.append( + pytest.param( + value, + id=f"{subpath.relative_to(TEST_DIR).with_suffix('')}-{key}", + ) + ) + return params + + +def pytest_generate_tests(metafunc: pytest.Metafunc) -> None: + """Generate tests for a ``@pytest.mark.fixture_file`` decorator.""" + for marker in metafunc.definition.iter_markers(name="fixture_file"): + params = create_parameters(*marker.args, **marker.kwargs) + metafunc.parametrize(argnames="content", argvalues=params) + + +@pytest.fixture +def write_fixture_files() -> Callable[[Path, dict[str, str | list[Path]]], None]: + def _inner(tmp: Path, content: dict[str, str | list[Path]]) -> None: + section_file_mapping: dict[str, Path] = { + "ubproject": tmp / "ubproject.toml", + } + for section, file_path in section_file_mapping.items(): + if section in content: + if isinstance(content[section], str): + file_path.write_text(content[section], encoding="utf-8") # type: ignore[assignment] + else: + raise ValueError( + f"Unsupported content type for section '{section}': {type(content[section])}" + ) + src_paths: list[Path] = [] + for key, value in content.items(): + if key.startswith("dummy") and isinstance(value, str): + dummy_file_path = tmp / key + dummy_file_path.write_text(value, encoding="utf-8") + src_paths.append(dummy_file_path) + content["src_paths"] = src_paths + + return _inner diff --git a/tests/test_analyse.py b/tests/test_analyse.py index d9897cc..ef11fc8 100644 --- a/tests/test_analyse.py +++ b/tests/test_analyse.py @@ -1,5 +1,7 @@ +from collections.abc import Callable import json from pathlib import Path +from typing import Any import pytest @@ -126,3 +128,39 @@ def test_analyse_oneline_needs( for src_file in src_analyse.src_files: cnt_comments += len(src_file.src_comments) assert cnt_comments == result["num_comments"] + + +@pytest.mark.fixture_file("fixture_files/analyse_rst.yml") +def test_analyse_rst( + tmp_path: Path, + content: dict[str, Any], + write_fixture_files: Callable[[Path, dict[str, Any]], None], + snapshot_marks: str, +): + write_fixture_files(tmp_path, content) + + src_paths = [tmp_path / src_path for src_path in content["src_paths"]] + src_analyse_config = SourceAnalyseConfig( + src_files=src_paths, + src_dir=tmp_path, + get_need_id_refs=False, + get_oneline_needs=False, + get_rst=True, + ) + + src_analyse = SourceAnalyse(src_analyse_config) + src_analyse.run() + src_analyse.dump_marked_content(tmp_path) + dumped_content = tmp_path / "marked_content.json" + + # assert src_analyse.rst_warnings + assert dumped_content.exists() + + with dumped_content.open("r") as f: + marked_content = json.load(f) + # normalize filepath + for obj in marked_content: + obj["filepath"] = ( + Path(obj["filepath"]).relative_to(src_analyse_config.src_dir) + ).as_posix() + assert marked_content == snapshot_marks diff --git a/tests/test_rst_parser.py b/tests/test_rst_parser.py index 9617e66..2228d44 100644 --- a/tests/test_rst_parser.py +++ b/tests/test_rst_parser.py @@ -214,6 +214,16 @@ def test_sn_rst_parser_negative(text: str): ".. req:: Title \n :option: value \n", ".. req:: Title \n :option: value \n", ), + # Multi-line with trailing spaces and content + ( + ".. req:: Title \n :option: value \n\n This is the content. \n", + ".. req:: Title \n :option: value \n\n This is the content.\n", + ), + # Multi-line with trailing and leading spaces and content + ( + " .. req:: Title \n :option: value \n\n This is the content. \n", + ".. req:: Title \n :option: value \n\n This is the content.\n", + ), ], ) def test_preprocess_rst(text: str, expected: str): From 75a47cbf3abb9b7f65a82fadbb1122e4a963a9e5 Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Tue, 11 Nov 2025 08:40:18 +0100 Subject: [PATCH 20/25] update parser --- src/sphinx_codelinks/analyse/sn_rst_parser.py | 20 +++++++++----- ..._analyse[src_dir0-src_paths0].anchors.json | 6 ++--- ...yse_rst[multiline_rst_marker].anchors.json | 26 +++++++++++++++++++ ...alyse_rst[oneline_rst_marker].anchors.json | 23 ++++++++++++++++ tests/fixture_files/analyse_rst.yml | 21 +++++++++++++++ tests/test_rst_parser.py | 4 +-- 6 files changed, 89 insertions(+), 11 deletions(-) create mode 100644 tests/__snapshots__/test_analyse/test_analyse_rst[multiline_rst_marker].anchors.json create mode 100644 tests/__snapshots__/test_analyse/test_analyse_rst[oneline_rst_marker].anchors.json create mode 100644 tests/fixture_files/analyse_rst.yml diff --git a/src/sphinx_codelinks/analyse/sn_rst_parser.py b/src/sphinx_codelinks/analyse/sn_rst_parser.py index db7098c..378b5e6 100644 --- a/src/sphinx_codelinks/analyse/sn_rst_parser.py +++ b/src/sphinx_codelinks/analyse/sn_rst_parser.py @@ -8,6 +8,8 @@ from lark import Lark, Transformer, UnexpectedInput, v_args +from sphinx_codelinks.config import UNIX_NEWLINE + class PreProcessError(Exception): """Custom error for preprocess issues.""" @@ -152,11 +154,17 @@ def parse_rst(text: str, num_spaces: int = 3) -> NeedDirectiveType | UnexpectedI def preprocess_rst(text: str) -> str: - """Only process valid RST directive text by stripping leading spaces before the directive marker.""" + """Process valid RST directive text before parsing. + + The followings are processed: + - Stripe leading spaces before the directive marker to get relative indentations. + - Stripe trailing spaces at the end + - Ensure the text ends with a newline. + """ if not text: # empty string, return as is return text - lines = text.splitlines(keepends=True) + lines = text.splitlines(keepends=False) idx_directive = lines[0].find( ".." ) # expect the first line is the start of the RST directive @@ -164,9 +172,9 @@ def preprocess_rst(text: str) -> str: # do nothing and let parser to handle it return text + # remove leading spaces for the relative indentation stripped_lines = [line[idx_directive:] for line in lines] - stripped_text = "".join(stripped_lines) - if "\n" not in text: - # to make the grammar happy for single line input - stripped_text = stripped_text.strip() + "\n" + stripped_text = UNIX_NEWLINE.join(stripped_lines) + # remove trailing spaces and make sure it ends with newline + stripped_text = stripped_text.strip() + "\n" return stripped_text diff --git a/tests/__snapshots__/test_analyse/test_analyse[src_dir0-src_paths0].anchors.json b/tests/__snapshots__/test_analyse/test_analyse[src_dir0-src_paths0].anchors.json index a1295c9..4d08346 100644 --- a/tests/__snapshots__/test_analyse/test_analyse[src_dir0-src_paths0].anchors.json +++ b/tests/__snapshots__/test_analyse/test_analyse[src_dir0-src_paths0].anchors.json @@ -5,11 +5,11 @@ "source_map": { "start": { "row": 3, - "column": 8 + "column": 0 }, "end": { - "row": 3, - "column": 61 + "row": 5, + "column": 9 } }, "tagged_scope": "void dummy_func1(){\n //...\n }", diff --git a/tests/__snapshots__/test_analyse/test_analyse_rst[multiline_rst_marker].anchors.json b/tests/__snapshots__/test_analyse/test_analyse_rst[multiline_rst_marker].anchors.json new file mode 100644 index 0000000..ee222ba --- /dev/null +++ b/tests/__snapshots__/test_analyse/test_analyse_rst[multiline_rst_marker].anchors.json @@ -0,0 +1,26 @@ +[ + { + "filepath": "dummy_1.c", + "remote_url": null, + "source_map": { + "start": { + "row": 2, + "column": 0 + }, + "end": { + "row": 7, + "column": 14 + } + }, + "tagged_scope": "int main() {\n return 0;\n}", + "rst": " .. impl:: implement main function\n :id: REQ_001\n :status: test\n\n This is content for the main function implementation.\n ", + "need": { + "type": "impl", + "title": "implement main function", + "content": "This is content for the main function implementation.", + "id": "REQ_001", + "status": "test" + }, + "type": "rst" + } +] \ No newline at end of file diff --git a/tests/__snapshots__/test_analyse/test_analyse_rst[oneline_rst_marker].anchors.json b/tests/__snapshots__/test_analyse/test_analyse_rst[oneline_rst_marker].anchors.json new file mode 100644 index 0000000..aa90b58 --- /dev/null +++ b/tests/__snapshots__/test_analyse/test_analyse_rst[oneline_rst_marker].anchors.json @@ -0,0 +1,23 @@ +[ + { + "filepath": "dummy_1.c", + "remote_url": null, + "source_map": { + "start": { + "row": 1, + "column": 7 + }, + "end": { + "row": 1, + "column": 41 + } + }, + "tagged_scope": "int main() {\n return 0;\n}", + "rst": ".. impl:: implement main function ", + "need": { + "type": "impl", + "title": "implement main function" + }, + "type": "rst" + } +] \ No newline at end of file diff --git a/tests/fixture_files/analyse_rst.yml b/tests/fixture_files/analyse_rst.yml new file mode 100644 index 0000000..d05c4e7 --- /dev/null +++ b/tests/fixture_files/analyse_rst.yml @@ -0,0 +1,21 @@ +oneline_rst_marker: + dummy_1.c: | + // @rst.. impl:: implement main function @endrst + int main() { + return 0; + } + +multiline_rst_marker: + dummy_1.c: | + /* + @rst + .. impl:: implement main function + :id: REQ_001 + :status: test + + This is content for the main function implementation. + @endrst + */ + int main() { + return 0; + } diff --git a/tests/test_rst_parser.py b/tests/test_rst_parser.py index 2228d44..9fad1d7 100644 --- a/tests/test_rst_parser.py +++ b/tests/test_rst_parser.py @@ -212,7 +212,7 @@ def test_sn_rst_parser_negative(text: str): # Multi-line with trailing spaces ( ".. req:: Title \n :option: value \n", - ".. req:: Title \n :option: value \n", + ".. req:: Title \n :option: value\n", ), # Multi-line with trailing spaces and content ( @@ -222,7 +222,7 @@ def test_sn_rst_parser_negative(text: str): # Multi-line with trailing and leading spaces and content ( " .. req:: Title \n :option: value \n\n This is the content. \n", - ".. req:: Title \n :option: value \n\n This is the content.\n", + ".. req:: Title \n :option: value \n\n This is the content.\n", ), ], ) From 1187a51dfca5b84d9acd54e3891875f710ad33d8 Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Tue, 11 Nov 2025 10:16:40 +0100 Subject: [PATCH 21/25] fix row in rst source map --- src/sphinx_codelinks/analyse/analyse.py | 21 +++++++++++++------ src/sphinx_codelinks/analyse/utils.py | 5 +++-- ..._analyse[src_dir0-src_paths0].anchors.json | 8 +++---- ...yse_rst[multiline_rst_marker].anchors.json | 4 ++-- ...alyse_rst[oneline_rst_marker].anchors.json | 4 ++-- 5 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/sphinx_codelinks/analyse/analyse.py b/src/sphinx_codelinks/analyse/analyse.py index cb7771a..0215743 100644 --- a/src/sphinx_codelinks/analyse/analyse.py +++ b/src/sphinx_codelinks/analyse/analyse.py @@ -279,14 +279,23 @@ def extract_marked_rst( ) if not extracted_rst: return None + start_row = src_comment.node.start_point.row + extracted_rst["row_offset"] if UNIX_NEWLINE in extracted_rst["rst_text"]: rst_text = utils.remove_leading_sequences( extracted_rst["rst_text"], self.analyse_config.marked_rst_config.strip_leading_sequences, ) + start_column = 0 # multi-line rst always start at column 0 of the start mark's next line + # -2 for last line of marker and row_offset is 0-indexed + end_row = start_row + extracted_rst["rst_text"].count(UNIX_NEWLINE) - 1 + end_column = len( + extracted_rst["rst_text"].split(UNIX_NEWLINE)[-2] + ) # last line is only the end marker else: rst_text = extracted_rst["rst_text"] - lineno = src_comment.node.start_point.row + extracted_rst["row_offset"] + 1 + start_column = extracted_rst["start_idx"] + end_row = start_row + end_column = extracted_rst["end_idx"] remote_url = self.git_remote_url if self.git_remote_url and self.git_commit_rev: remote_url = utils.form_https_url( @@ -294,16 +303,16 @@ def extract_marked_rst( self.git_commit_rev, self.project_path, filepath, - lineno, + start_row + 1, ) source_map: SourceMap = { "start": { - "row": lineno - 1, - "column": extracted_rst["start_idx"], + "row": start_row, + "column": start_column, }, "end": { - "row": lineno - 1, - "column": extracted_rst["end_idx"], + "row": end_row, + "column": end_column, }, } need_directive: None | NeedDirectiveType | UnexpectedInput = None diff --git a/src/sphinx_codelinks/analyse/utils.py b/src/sphinx_codelinks/analyse/utils.py index b69d35c..e391b6e 100644 --- a/src/sphinx_codelinks/analyse/utils.py +++ b/src/sphinx_codelinks/analyse/utils.py @@ -363,7 +363,7 @@ def extract_rst( if start_idx == -1 or end_idx == -1: return None rst_text = text[start_idx + len(start_marker) : end_idx] - row_offset = len(text[:start_idx].splitlines()) + row_offset = text[:start_idx].count(UNIX_NEWLINE) if not rst_text.strip(): # empty string is out of the interest return None @@ -383,7 +383,8 @@ def extract_rst( rst_text = rst_text[first_newline_idx + len(UNIX_NEWLINE) :] multiline_rst: ExtractedRstType = { "rst_text": rst_text, - "row_offset": row_offset, + "row_offset": row_offset + + 1, # +1 for the rst text starts from the next line of start marker "start_idx": start_idx + len(start_marker) + first_newline_idx diff --git a/tests/__snapshots__/test_analyse/test_analyse[src_dir0-src_paths0].anchors.json b/tests/__snapshots__/test_analyse/test_analyse[src_dir0-src_paths0].anchors.json index 4d08346..0719faf 100644 --- a/tests/__snapshots__/test_analyse/test_analyse[src_dir0-src_paths0].anchors.json +++ b/tests/__snapshots__/test_analyse/test_analyse[src_dir0-src_paths0].anchors.json @@ -4,12 +4,12 @@ "remote_url": null, "source_map": { "start": { - "row": 3, + "row": 4, "column": 0 }, "end": { "row": 5, - "column": 9 + "column": 15 } }, "tagged_scope": "void dummy_func1(){\n //...\n }", @@ -26,11 +26,11 @@ "remote_url": null, "source_map": { "start": { - "row": 13, + "row": 12, "column": 7 }, "end": { - "row": 13, + "row": 12, "column": 41 } }, diff --git a/tests/__snapshots__/test_analyse/test_analyse_rst[multiline_rst_marker].anchors.json b/tests/__snapshots__/test_analyse/test_analyse_rst[multiline_rst_marker].anchors.json index ee222ba..2a76608 100644 --- a/tests/__snapshots__/test_analyse/test_analyse_rst[multiline_rst_marker].anchors.json +++ b/tests/__snapshots__/test_analyse/test_analyse_rst[multiline_rst_marker].anchors.json @@ -8,8 +8,8 @@ "column": 0 }, "end": { - "row": 7, - "column": 14 + "row": 6, + "column": 58 } }, "tagged_scope": "int main() {\n return 0;\n}", diff --git a/tests/__snapshots__/test_analyse/test_analyse_rst[oneline_rst_marker].anchors.json b/tests/__snapshots__/test_analyse/test_analyse_rst[oneline_rst_marker].anchors.json index aa90b58..6ca41b4 100644 --- a/tests/__snapshots__/test_analyse/test_analyse_rst[oneline_rst_marker].anchors.json +++ b/tests/__snapshots__/test_analyse/test_analyse_rst[oneline_rst_marker].anchors.json @@ -4,11 +4,11 @@ "remote_url": null, "source_map": { "start": { - "row": 1, + "row": 0, "column": 7 }, "end": { - "row": 1, + "row": 0, "column": 41 } }, From 3a206f9589dc7b6423d66977d725580ef117e2aa Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Tue, 11 Nov 2025 16:59:16 +0100 Subject: [PATCH 22/25] fix rst text positions --- src/sphinx_codelinks/analyse/analyse.py | 27 ++++++++++++++++++++----- src/sphinx_codelinks/analyse/utils.py | 2 ++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/sphinx_codelinks/analyse/analyse.py b/src/sphinx_codelinks/analyse/analyse.py index 0215743..5f6b36c 100644 --- a/src/sphinx_codelinks/analyse/analyse.py +++ b/src/sphinx_codelinks/analyse/analyse.py @@ -286,16 +286,33 @@ def extract_marked_rst( self.analyse_config.marked_rst_config.strip_leading_sequences, ) start_column = 0 # multi-line rst always start at column 0 of the start mark's next line - # -2 for last line of marker and row_offset is 0-indexed + # -1 for last line of the marker end_row = start_row + extracted_rst["rst_text"].count(UNIX_NEWLINE) - 1 end_column = len( - extracted_rst["rst_text"].split(UNIX_NEWLINE)[-2] - ) # last line is only the end marker + rst_text.splitlines()[(end_row - start_row)] + ) # This is the line before the multiline end marker else: + # single line rst marker + lines = text.splitlines() rst_text = extracted_rst["rst_text"] - start_column = extracted_rst["start_idx"] + column_offset = 0 # offset before the comment start + if src_comment.node.start_point.row == src_comment.node.end_point.row: + # single-line comment + column_offset = src_comment.node.start_point.column + start_column = ( + lines[extracted_rst["row_offset"]].find( + self.analyse_config.marked_rst_config.start_sequence + ) + + len(self.analyse_config.marked_rst_config.start_sequence) + + column_offset + ) # single-line rst start column end_row = start_row - end_column = extracted_rst["end_idx"] + end_column = ( + lines[extracted_rst["row_offset"]].rfind( + self.analyse_config.marked_rst_config.end_sequence + ) + + column_offset + ) # single-line rst end column remote_url = self.git_remote_url if self.git_remote_url and self.git_commit_rev: remote_url = utils.form_https_url( diff --git a/src/sphinx_codelinks/analyse/utils.py b/src/sphinx_codelinks/analyse/utils.py index e391b6e..bbd39b8 100644 --- a/src/sphinx_codelinks/analyse/utils.py +++ b/src/sphinx_codelinks/analyse/utils.py @@ -327,6 +327,8 @@ def remove_leading_sequences(text: str, leading_sequences: list[str]) -> str: class ExtractedRstType(TypedDict): + """RST text extracted from comments and its position info in the context of the comment.""" + rst_text: str row_offset: int start_idx: int From f6498ccfef46b81880d8e8093d72fb509fb81bc0 Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Tue, 11 Nov 2025 17:05:32 +0100 Subject: [PATCH 23/25] update snapshot --- .../test_analyse[src_dir0-src_paths0].anchors.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/__snapshots__/test_analyse/test_analyse[src_dir0-src_paths0].anchors.json b/tests/__snapshots__/test_analyse/test_analyse[src_dir0-src_paths0].anchors.json index 0719faf..2fb7ba6 100644 --- a/tests/__snapshots__/test_analyse/test_analyse[src_dir0-src_paths0].anchors.json +++ b/tests/__snapshots__/test_analyse/test_analyse[src_dir0-src_paths0].anchors.json @@ -27,11 +27,11 @@ "source_map": { "start": { "row": 12, - "column": 7 + "column": 8 }, "end": { "row": 12, - "column": 41 + "column": 42 } }, "tagged_scope": "int main() {\n std::cout << \"Starting demo_1...\" << std::endl;\n dummy_func1();\n std::cout << \"Demo_1 finished.\" << std::endl;\n return 0;\n }", From f852ae297a0ccede4849245ccfef7d5679887c7c Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Tue, 11 Nov 2025 17:16:38 +0100 Subject: [PATCH 24/25] fix column position --- src/sphinx_codelinks/analyse/analyse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sphinx_codelinks/analyse/analyse.py b/src/sphinx_codelinks/analyse/analyse.py index 5f6b36c..c564dad 100644 --- a/src/sphinx_codelinks/analyse/analyse.py +++ b/src/sphinx_codelinks/analyse/analyse.py @@ -289,7 +289,7 @@ def extract_marked_rst( # -1 for last line of the marker end_row = start_row + extracted_rst["rst_text"].count(UNIX_NEWLINE) - 1 end_column = len( - rst_text.splitlines()[(end_row - start_row)] + extracted_rst["rst_text"].splitlines()[(end_row - start_row)] ) # This is the line before the multiline end marker else: # single line rst marker From 31caef2303904d28d6f9f26e54ab7318f0aac47f Mon Sep 17 00:00:00 2001 From: juiwenchen Date: Tue, 11 Nov 2025 17:28:14 +0100 Subject: [PATCH 25/25] update TC --- ...[leading_asterisk_rst_marker].anchors.json | 26 ++++++++++ ...leading_asterisks_rst_marker].anchors.json | 26 ++++++++++ ..._rst[link_options_rst_marker].anchors.json | 29 +++++++++++ tests/fixture_files/analyse_rst.yml | 48 +++++++++++++++++++ tests/test_analyse.py | 8 ++++ 5 files changed, 137 insertions(+) create mode 100644 tests/__snapshots__/test_analyse/test_analyse_rst[leading_asterisk_rst_marker].anchors.json create mode 100644 tests/__snapshots__/test_analyse/test_analyse_rst[leading_asterisks_rst_marker].anchors.json create mode 100644 tests/__snapshots__/test_analyse/test_analyse_rst[link_options_rst_marker].anchors.json diff --git a/tests/__snapshots__/test_analyse/test_analyse_rst[leading_asterisk_rst_marker].anchors.json b/tests/__snapshots__/test_analyse/test_analyse_rst[leading_asterisk_rst_marker].anchors.json new file mode 100644 index 0000000..65f42b4 --- /dev/null +++ b/tests/__snapshots__/test_analyse/test_analyse_rst[leading_asterisk_rst_marker].anchors.json @@ -0,0 +1,26 @@ +[ + { + "filepath": "dummy_1.c", + "remote_url": null, + "source_map": { + "start": { + "row": 2, + "column": 0 + }, + "end": { + "row": 6, + "column": 59 + } + }, + "tagged_scope": "int main() {\n return 0;\n}", + "rst": " .. impl:: implement main function\n :id: REQ_001\n :status: test\n\n This is content for the main function implementation.\n ", + "need": { + "type": "impl", + "title": "implement main function", + "content": "This is content for the main function implementation.", + "id": "REQ_001", + "status": "test" + }, + "type": "rst" + } +] \ No newline at end of file diff --git a/tests/__snapshots__/test_analyse/test_analyse_rst[leading_asterisks_rst_marker].anchors.json b/tests/__snapshots__/test_analyse/test_analyse_rst[leading_asterisks_rst_marker].anchors.json new file mode 100644 index 0000000..449c661 --- /dev/null +++ b/tests/__snapshots__/test_analyse/test_analyse_rst[leading_asterisks_rst_marker].anchors.json @@ -0,0 +1,26 @@ +[ + { + "filepath": "dummy_1.c", + "remote_url": null, + "source_map": { + "start": { + "row": 2, + "column": 0 + }, + "end": { + "row": 6, + "column": 60 + } + }, + "tagged_scope": "int main() {\n return 0;\n}", + "rst": " .. impl:: implement main function\n :id: REQ_001\n :status: test\n\n This is content for the main function implementation.\n ", + "need": { + "type": "impl", + "title": "implement main function", + "content": "This is content for the main function implementation.", + "id": "REQ_001", + "status": "test" + }, + "type": "rst" + } +] \ No newline at end of file diff --git a/tests/__snapshots__/test_analyse/test_analyse_rst[link_options_rst_marker].anchors.json b/tests/__snapshots__/test_analyse/test_analyse_rst[link_options_rst_marker].anchors.json new file mode 100644 index 0000000..c06ea92 --- /dev/null +++ b/tests/__snapshots__/test_analyse/test_analyse_rst[link_options_rst_marker].anchors.json @@ -0,0 +1,29 @@ +[ + { + "filepath": "dummy_1.c", + "remote_url": null, + "source_map": { + "start": { + "row": 2, + "column": 0 + }, + "end": { + "row": 6, + "column": 59 + } + }, + "tagged_scope": "int main() {\n return 0;\n}", + "rst": " .. impl:: implement main function\n :id: REQ_001\n :links: IMPL_001, IMPL_002\n\n This is content for the main function implementation.\n ", + "need": { + "type": "impl", + "title": "implement main function", + "content": "This is content for the main function implementation.", + "id": "REQ_001", + "links": [ + "IMPL_001", + " IMPL_002" + ] + }, + "type": "rst" + } +] \ No newline at end of file diff --git a/tests/fixture_files/analyse_rst.yml b/tests/fixture_files/analyse_rst.yml index d05c4e7..1184610 100644 --- a/tests/fixture_files/analyse_rst.yml +++ b/tests/fixture_files/analyse_rst.yml @@ -19,3 +19,51 @@ multiline_rst_marker: int main() { return 0; } + +leading_asterisk_rst_marker: + dummy_1.c: | + /* + * @rst + * .. impl:: implement main function + * :id: REQ_001 + * :status: test + * + * This is content for the main function implementation. + * @endrst + */ + int main() { + return 0; + } + +leading_asterisks_rst_marker: + marked_rst_config: + strip_leading_sequences: + - '**' + dummy_1.c: | + /** + ** @rst + ** .. impl:: implement main function + ** :id: REQ_001 + ** :status: test + ** + ** This is content for the main function implementation. + ** @endrst + **/ + int main() { + return 0; + } + +link_options_rst_marker: + dummy_1.c: | + /* + * @rst + * .. impl:: implement main function + * :id: REQ_001 + * :links: IMPL_001, IMPL_002 + * + * This is content for the main function implementation. + * @endrst + */ + int main() { + return 0; + } diff --git a/tests/test_analyse.py b/tests/test_analyse.py index ef11fc8..2210a55 100644 --- a/tests/test_analyse.py +++ b/tests/test_analyse.py @@ -148,6 +148,14 @@ def test_analyse_rst( get_rst=True, ) + if "marked_rst_config" in content: + src_analyse_config.marked_rst_config.strip_leading_sequences = content[ + "marked_rst_config" + ].get( + "strip_leading_sequences", + src_analyse_config.marked_rst_config.strip_leading_sequences, + ) + src_analyse = SourceAnalyse(src_analyse_config) src_analyse.run() src_analyse.dump_marked_content(tmp_path)