Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 84 additions & 9 deletions strictdoc/core/file_traceability_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
from strictdoc.helpers.cast import assert_cast
from strictdoc.helpers.exception import StrictDocException
from strictdoc.helpers.google_test import convert_function_name_to_gtest_macro
from strictdoc.helpers.mid import MID
from strictdoc.helpers.ordered_set import OrderedSet

if TYPE_CHECKING:
Expand Down Expand Up @@ -601,15 +602,36 @@ def validate_and_resolve(
continue

assert source_node_.entity_name is not None
sdoc_node = None
sdoc_node_uid = source_node_.get_sdoc_field(
"UID", relevant_source_node_entry
)
if sdoc_node_uid is None:
sdoc_node_uid = f"{document_uid}/{path_to_source_file_}/{source_node_.entity_name}"
sdoc_node = traceability_index.get_node_by_uid_weak(
sdoc_node_uid
mid = source_node_.get_sdoc_field(
"MID", relevant_source_node_entry
)

# First merge criterion: Merge if SDoc node with same MID exists.
if mid is not None:
sdoc_node_mid = MID(mid)
merge_candidate_sdoc_node = (
traceability_index.get_node_by_mid_weak(sdoc_node_mid)
)
if isinstance(merge_candidate_sdoc_node, SDocNode):
sdoc_node = merge_candidate_sdoc_node
sdoc_node_uid = sdoc_node.reserved_uid

if sdoc_node is None:
# If no UID from source code field or merge-by-MID, create UID by conventional scheme.
if sdoc_node_uid is None:
sdoc_node_uid = f"{document_uid}/{path_to_source_file_}/{source_node_.entity_name}"
# Second merge criterion: Merge if SDoc node with same UID exists.
tmp_sdoc_node = traceability_index.get_node_by_uid_weak(
sdoc_node_uid
)
if isinstance(tmp_sdoc_node, SDocNode):
sdoc_node = tmp_sdoc_node

assert sdoc_node_uid is not None
if sdoc_node is not None:
sdoc_node = assert_cast(sdoc_node, SDocNode)
self.merge_sdoc_node_with_source_node(
Expand All @@ -626,11 +648,6 @@ def validate_and_resolve(
document,
)
sdoc_node_uid = assert_cast(sdoc_node.reserved_uid, str)
traceability_index.graph_database.create_link(
link_type=GraphLinkType.UID_TO_NODE,
lhs_node=sdoc_node_uid,
rhs_node=sdoc_node,
)
if current_top_node is None:
current_top_node = (
FileTraceabilityIndex.create_source_node_section(
Expand Down Expand Up @@ -998,6 +1015,25 @@ def merge_sdoc_node_with_source_node(
)
# Merge strategy: overwrite any field if there's a field with same name from custom tags.
sdoc_node_fields = source_node.get_sdoc_fields(source_node_config_entry)

# Sanity check: Nor UID neither MID must conflict (early auto-MID is allowed to be overwritten)
if (
"MID" in sdoc_node.ordered_fields_lookup
and "MID" in sdoc_node_fields
):
sdoc_mid_field = sdoc_node.get_field_by_name("MID").get_text_value()
if sdoc_mid_field != sdoc_node_fields["MID"]:
raise StrictDocException(
f"Can't merge node by UID {sdoc_node.reserved_uid}: "
f"Conflicting MID: {sdoc_mid_field} != {sdoc_node_fields['MID']}"
)
if sdoc_node.reserved_uid is not None and "UID" in sdoc_node_fields:
if sdoc_node.reserved_uid != sdoc_node_fields["UID"]:
raise StrictDocException(
f"Can't merge node by MID {sdoc_node.reserved_mid}: "
f"Conflicting UID: {sdoc_node.reserved_uid} != {sdoc_node_fields['UID']}"
)

FileTraceabilityIndex.set_sdoc_node_fields(sdoc_node, sdoc_node_fields)

@staticmethod
Expand Down Expand Up @@ -1081,6 +1117,45 @@ def connect_source_node_requirements(

Here we link REQ and sdoc_node bidirectional.
"""
if (
sdoc_node.reserved_uid is not None
and not traceability_index.graph_database.has_link(
link_type=GraphLinkType.UID_TO_NODE,
lhs_node=sdoc_node.reserved_uid,
rhs_node=sdoc_node,
)
):
traceability_index.graph_database.create_link(
link_type=GraphLinkType.UID_TO_NODE,
lhs_node=sdoc_node.reserved_uid,
rhs_node=sdoc_node,
)

# A merge procedure may have overwritten the MID,
# in which case the graph database and search index needs an update.
if "MID" in sdoc_node.ordered_fields_lookup != sdoc_node.reserved_mid:
sdoc_mid_field = sdoc_node.get_field_by_name("MID").get_text_value()
if sdoc_mid_field != sdoc_node.reserved_mid:
# TODO:
# If we really want to support changing the auto-assigned MID,
# at least the graph database and the document search index need an update (remove old MID, add new MID).
# I currently struggle to update the search index.
Copy link
Contributor Author

@haxtibal haxtibal Nov 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I've cleaned up up a bit now. Here is my remaining problem. I try to allow and handle this case

# sdoc
[SRC_NODE]
UID: SREQ-1

# example.c
/*
 * UID: SREQ-1
 * MID: 12345678
 */
example_1() { }

meaning a MID from source code would overwrite the earlier auto-assigned MID. However, auto-assigned MID has been entered to graph and search index early (at least, maybe also to other places?) and I would need to update them. I know how to update the graph, but couldn't figure out how to consistently update the search index.

What do you think, would it be easy enough to do these updates, or should I simply not allow that edge case (exit with error)?

EDIT: For myself, I tend to not allow it. That edge case is not needed for the Linux showcase. I don't like the idea of having to modify already established graph connections while we're still in traceability construction phase. Rather, it should be possible to conceptually separate things into a "compile" and "link" phase as you already suggested. Source node parsing and merging would be part of the compile phase. At it's end we would know all nodes with some "I would like to link to..." information, but nothing is actually linked yet. And only the final link stage will add links to the graph DB and create the search index.

Copy link
Collaborator

@stanislaw stanislaw Nov 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I need to think about your comment but something that is not clear to me even before and in general is how we want to auto-generate the UUID for both source code and sidecar when both or either of SDoc node's or source file's MID/UUID do not exist yet. Is my understanding correct that we will not have the human-readable UID at all in the Linux context?

# sdoc
[SRC_NODE]
 * Has no MID or UID, or MID only but the source node may not have it initially?

# example.c
/*
 * Has no MID or UID, or MID only but the SDoc document node may not have it initially?
 */
example_1() { }

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On other words, it is some sort of a chicken-and-egg problem. How are we imaging the workflow of auto-generating MID/UUID between source code and sidecars?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

something that is not clear to me even before and in general is how we want to auto-generate the UUID for both source code and sidecar when both or either of SDoc node or source file do not exist yet

We can look at ELISA's trace_events.c annotations and their idgen.py script.

Source code starts of like

/*
 * SPDX-Req-ID: [TODO: automatically generate it]
 * ...
 */

Then one shall call idgen.py generate trace_events.c to calculate sha256sum("linux" + "trace_events.c" + instance + code), where instance is the text after SPDX-Text: in the comment, and code is the full C-function definition without comment.

The script acknowledges the problem you have mentioned

# TODO: since sidecar is not yet defined, this script doesn't consider the sidecar added content to the instance.

I see a few options:

  1. For initial uuid generation, only hash over content in source code but neglect the sdoc part (that's what idgen.py currently does). Copying the generated UUID to sdoc is a manual step. Only the second run will have the nodes merged.
  2. Start off with SPDX-Req-ID: UUID-TICKET-123, and MID: UUID-TICKET-123 in related sdoc. When StrictDoc sees such a preliminary UID, it will replace it with a proper calculated hash value
  3. Start off with SPDX-Req-ID: [TODO: automatically generate it], and MID: tracing.c/__ftrace_event_enable_disable in related sdoc. Let StrictDoc merge by conventional MID and replace conventional UID with proper calculated hash value
  4. Use UID (manually assigned) + MID, and merge by UID.

I have no clear favorite right now from that options. Maybe we should ask Gabriele?

Is my understanding correct that we will not have the UID at all in the Linux context?

Yes, that's also my understanding. The pilot work nowhere mentions a UID. If we wanted one, it's up to us to propose it.

parent_document = sdoc_node.get_parent_or_including_document()
sdoc_node.reserved_mid = MID(sdoc_mid_field)
if parent_document.config.enable_mid:
sdoc_node.mid_permanent = True

if not traceability_index.graph_database.has_link(
link_type=GraphLinkType.MID_TO_NODE,
lhs_node=sdoc_node.reserved_mid,
rhs_node=sdoc_node,
):
traceability_index.graph_database.create_link(
link_type=GraphLinkType.MID_TO_NODE,
lhs_node=sdoc_node.reserved_mid,
rhs_node=sdoc_node,
)

for marker_ in source_node.markers:
if not isinstance(marker_, FunctionRangeMarker):
continue
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ ELEMENTS:
TITLE: Merge example.c into static nodes

[REQUIREMENT]
UID: SRC-NODES-BASE/src/example/example.c/example_1
UID: SRC-NODES-BASE/src/example.c/example_1
TITLE: TITLE from sdoc
FOO: FOO text from sdoc
BAR: BAR text from sdoc
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#
# This test verifies that a source nodes is merged with a static SDoc node if
# - the source nodes is not marked up with a UID field (i.e., default UID is effective), and
# - static SDoc node was explicitly given the default UID.
#
# @relation(SDOC-SRS-141, scope=file)
#

RUN: %strictdoc --debug export %S --output-dir %T | filecheck %s

CHECK: Published: Hello world doc

RUN: %check_exists --file "%T/html/_source_files/src/example.c.html"

RUN: %cat %T/html/%THIS_TEST_FOLDER/source_node_base.html | filecheck %s --check-prefix CHECK-HTML
CHECK-HTML: Requirements from Source Nodes
CHECK-HTML: SRC-NODES-BASE/src/example.c/example_1
CHECK-HTML: TITLE from sdoc
CHECK-HTML: class="requirement__link-parent" href="../30_merge_with_sdoc_by_default_uid/parent.html#REQ-1"
CHECK-HTML: src/example.c, <i>lines: 3-14</i>, function example_1()
CHECK-HTML-NOT: FOO text from sdoc
CHECK-HTML: FOO text from example.c
CHECK-HTML-NOT: BAR text from sdoc
CHECK-HTML: BAR text from example.c

RUN: %cat %T/html/_source_files/src/example.c.html | filecheck %s --check-prefix CHECK-SOURCE-FILE
CHECK-SOURCE-FILE: SRC-NODES-BASE/src/example.c/example_1

RUN: %cat %T/html/source_coverage.html | filecheck %s --check-prefix CHECK-SOURCE-COVERAGE
CHECK-SOURCE-COVERAGE: 100.0

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[DOCUMENT]
TITLE: Hello world doc

[REQUIREMENT]
UID: REQ-1
TITLE: Requirement Title
STATEMENT: Requirement Statement

[REQUIREMENT]
UID: REQ-2
TITLE: Requirement Title #2
STATEMENT: Requirement Statement #2

[REQUIREMENT]
UID: REQ-3
TITLE: Requirement Title #3
STATEMENT: Requirement Statement #3
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
[DOCUMENT]
MID: c2d4542d5f1741c88dfcb4f68ad7dcbd
TITLE: Requirements from Source Nodes
UID: SRC-NODES-BASE

[GRAMMAR]
ELEMENTS:
- TAG: SECTION
PROPERTIES:
IS_COMPOSITE: True
FIELDS:
- TITLE: UID
TYPE: String
REQUIRED: False
- TITLE: TITLE
TYPE: String
REQUIRED: True
- TAG: REQUIREMENT
PROPERTIES:
VIEW_STYLE: Narrative
FIELDS:
- TITLE: UID
TYPE: String
REQUIRED: False
- TITLE: MID
TYPE: String
REQUIRED: False
- TITLE: TITLE
TYPE: String
REQUIRED: False
- TITLE: FOO
TYPE: String
REQUIRED: False
- TITLE: BAR
TYPE: String
REQUIRED: False
RELATIONS:
- TYPE: Parent
- TYPE: File

[[SECTION]]
TITLE: Merge example.c into static nodes

[REQUIREMENT]
UID: REQ-SOURCE-1
TITLE: TITLE1 from sdoc
FOO: FOO1 text from sdoc
BAR: BAR1 text from sdoc
RELATIONS:
- TYPE: Parent
VALUE: REQ-1

[REQUIREMENT]
UID: REQ-SOURCE-2
MID: 80cd685d-0e18-44b8-9842-c1863a2eb9ec
TITLE: TITLE2 from sdoc
FOO: FOO2 text from sdoc
BAR: BAR2 text from sdoc
RELATIONS:
- TYPE: Parent
VALUE: REQ-2

[REQUIREMENT]
UID: REQ-SOURCE-3
TITLE: TITLE3 from sdoc
FOO: FOO3 text from sdoc
BAR: BAR3 text from sdoc
RELATIONS:
- TYPE: Parent
VALUE: REQ-3

[[/SECTION]]
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#include <stdio.h>

/**
* Some text.
*
* @relation(REQ-1, scope=function)
*
* UID: REQ-SOURCE-1
*
* FOO: FOO1 text from example.c
*
* BAR: BAR1 text from example.c
*/
void example_1(void) {
print("hello world\n");
}

/**
* Some text.
*
* @relation(REQ-2, scope=function)
*
* UID: REQ-SOURCE-2
*
* FOO: FOO2 text from example.c
*
* BAR: BAR2 text from example.c
*/
void example_2(void) {
print("hello world\n");
}

/**
* Some text.
*
* @relation(REQ-3, scope=function)
*
* UID: REQ-SOURCE-3
*
* MID: 1973a567-a109-491d-b7f0-6bb22eafa6ab
*
* FOO: FOO3 text from example.c
*
* BAR: BAR3 text from example.c
*/
void example_3(void) {
print("hello world\n");
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[project]

features = [
"REQUIREMENT_TO_SOURCE_TRACEABILITY",
"SOURCE_FILE_LANGUAGE_PARSERS",
]

source_nodes = [
{ "src/" = { uid = "SRC-NODES-BASE", node_type = "REQUIREMENT" } }
]

exclude_source_paths = [
"test.itest"
]
Loading
Loading