Skip to content

Commit 5a12398

Browse files
✨ Licence compliance check (#45)
* creating a way to identify equivalent strings * Added unit tests * modified spdx report to use licence lookup * modified configuration to list accepted licences * checking licence compliance of a package and its dependencies * copyright and news file * making code climate happy * making code climate even more happy and hopefully for good * formatting improvement * try forcing publishing artifacts even if check fails * Update mbed_tools_ci_scripts/utils/third_party_licences.py Co-Authored-By: Rob Walton <rob.walton@arm.com> * changes following review comments * Updated the PR to handle more licence descriptor cases. * Adding a way to use a regex to defined allowed licences e.g. `BSD* ` * licence expression understanding improvement * fixing problems with comma in the licence description * fix regex * fix test * Better handled licences and put in place a way to mark packages as checked with regards to licensing * listing the packages for which the licence has been checked Co-authored-by: Rob Walton <rob.walton@arm.com>
1 parent 62f0e4f commit 5a12398

19 files changed

+1152
-53
lines changed

azure-pipelines/build-release.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ stages:
153153
- publish: $(temp_spdx_reports_path)
154154
artifact: SPDX
155155
displayName: 'Publish SPDX reports'
156+
condition: always()
156157

157158
# Collect test and build stages together before the release stages to provide a pass/fail point for the status badge.
158159
- stage: CiCheckpoint

mbed_tools_ci_scripts/report_third_party_ip.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,15 @@
1616
import argparse
1717
import logging
1818
from pathlib import Path
19-
19+
from typing import Any
2020
from mbed_tools_ci_scripts.spdx_report.spdx_project import SpdxProject
2121
from mbed_tools_ci_scripts.utils.logging import set_log_level, log_exception
2222
from mbed_tools_ci_scripts.utils.package_helpers import CurrentProjectMetadataParser, generate_package_info
2323

2424
logger = logging.getLogger(__name__)
2525

2626

27-
def generate_spdx_reports(output_directory: Path) -> None:
27+
def generate_spdx_reports(output_directory: Path) -> SpdxProject:
2828
"""Generates all the SPDX reports for the current project."""
2929
logger.info("Generating package information.")
3030
try:
@@ -34,14 +34,21 @@ def generate_spdx_reports(output_directory: Path) -> None:
3434
log_exception(logger, e)
3535

3636
logger.info("Generating SPDX report.")
37-
SpdxProject(CurrentProjectMetadataParser()).generate_tag_value_files(output_directory)
37+
project = SpdxProject(CurrentProjectMetadataParser())
38+
project.generate_tag_value_files(output_directory)
39+
return project
3840

3941

4042
def main() -> int:
4143
"""Script CLI."""
4244
parser = argparse.ArgumentParser(description="Generate licence and third-party IP reports.")
45+
46+
def convert_to_path(arg: Any) -> Path:
47+
"""Converts argument to a path."""
48+
return Path(arg)
49+
4350
parser.add_argument(
44-
"-o", "--output-dir", help="Output directory where the files are generated", required=True,
51+
"-o", "--output-dir", help="Output directory where the files are generated", required=True, type=convert_to_path
4552
)
4653

4754
parser.add_argument(
@@ -51,7 +58,8 @@ def main() -> int:
5158
set_log_level(args.verbose)
5259

5360
try:
54-
generate_spdx_reports(Path(args.output_dir))
61+
project = generate_spdx_reports(args.output_dir)
62+
project.check_licence_compliance()
5563
return 0
5664
except Exception as e:
5765
log_exception(logger, e)

mbed_tools_ci_scripts/spdx_report/spdx_document.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,11 @@ def external_refs(self) -> List[DependencySpdxDocumentRef]:
177177
"""
178178
return self._other_document_references
179179

180+
@external_refs.setter
181+
def external_refs(self, external_refs: List[DependencySpdxDocumentRef]) -> None:
182+
"""Sets the document external references."""
183+
self._other_document_references = external_refs
184+
180185
def generate_spdx_package(self) -> SpdxPackage:
181186
"""Generates the SPDX package for this package.
182187

mbed_tools_ci_scripts/spdx_report/spdx_file.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
)
1818
from mbed_tools_ci_scripts.utils.definitions import UNKNOWN
1919
from mbed_tools_ci_scripts.utils.hash_helpers import generate_uuid_based_on_str, determine_sha1_hash_of_file
20+
from mbed_tools_ci_scripts.utils.third_party_licences import cleanse_licence_expression
2021

2122

2223
class SpdxFile:
@@ -88,7 +89,7 @@ def licence(self) -> str:
8889
file's licence
8990
"""
9091
file_licence = determine_file_licence(self.path)
91-
return file_licence if file_licence else self._package_licence
92+
return cleanse_licence_expression(file_licence) if file_licence else self._package_licence
9293

9394
@property
9495
def copyright(self) -> Optional[str]:

mbed_tools_ci_scripts/spdx_report/spdx_helpers.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,14 @@
1717

1818
import logging
1919
import toml
20-
from license_expression import Licensing
2120
from pathlib import Path
2221
from spdx.utils import SPDXNone, UnKnown
23-
from typing import Union, Optional, Iterator, List
22+
from typing import Union, Optional, Iterator, Iterable, Any
2423

25-
from mbed_tools_ci_scripts.utils.filesystem_helpers import scan_file_for_pattern, should_exclude_path, list_all_files
24+
from mbed_tools_ci_scripts.utils.configuration import ConfigurationVariable, configuration
2625
from mbed_tools_ci_scripts.utils.definitions import UNKNOWN
26+
from mbed_tools_ci_scripts.utils.filesystem_helpers import scan_file_for_pattern, should_exclude_path, list_all_files
27+
from mbed_tools_ci_scripts.utils.third_party_licences import simplify_licence_expression
2728

2829
logger = logging.getLogger(__name__)
2930

@@ -56,7 +57,7 @@ def determine_file_licence(path: Path) -> Optional[str]:
5657
if not match:
5758
return None
5859
licence = match.group(1).strip()
59-
return str(Licensing().parse(licence).simplify())
60+
return simplify_licence_expression(licence)
6061
except Exception as e:
6162
logger.error(f"Could not determine the licence of file [{path}] from identifier '{licence}'. Reason: {e}.")
6263
return None
@@ -105,7 +106,21 @@ def ignore_path(p: Path) -> bool:
105106
return list_all_files(project_root, ignore_path)
106107

107108

108-
def determine_licence_compound(main_licence: str, additional_licences: List[str]) -> str:
109-
"""Determines the overall licence based on main licence and additional licences."""
110-
overall_licence = f"({main_licence}) AND ({') AND ('.join(additional_licences)})"
111-
return str(Licensing().parse(overall_licence).simplify())
109+
def determine_checked_packages_from_string(checked_packages: Any) -> Iterable[Any]:
110+
"""Determines the list of packages for which the licence has been checked."""
111+
if isinstance(checked_packages, str):
112+
checked_packages = checked_packages.split(", ")
113+
if isinstance(checked_packages, (list, dict, tuple, set)):
114+
yield from checked_packages
115+
116+
117+
def get_packages_with_checked_licence() -> Iterable[str]:
118+
"""Determines the list of packages for which the licence has been checked from configuration."""
119+
yield from determine_checked_packages_from_string(
120+
configuration.get_value(ConfigurationVariable.PACKAGES_WITH_CHECKED_LICENCE)
121+
)
122+
123+
124+
def is_package_licence_checked(package_name: str) -> bool:
125+
"""States whether the licence of a package has been checked and hence, that its licence is compliant."""
126+
return package_name.strip() in get_packages_with_checked_licence()

mbed_tools_ci_scripts/spdx_report/spdx_package.py

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#
55
"""Definition of an SPDX Package."""
66

7+
from dataclasses import dataclass
78
from pathlib import Path
89
from spdx.checksum import Algorithm
910
from spdx.creationinfo import Person
@@ -16,11 +17,15 @@
1617
from mbed_tools_ci_scripts.spdx_report.spdx_helpers import (
1718
determine_spdx_value,
1819
list_project_files_for_licensing,
19-
determine_licence_compound,
2020
)
2121
from mbed_tools_ci_scripts.utils.definitions import UNKNOWN
2222
from mbed_tools_ci_scripts.utils.package_helpers import PackageMetadata
23-
from dataclasses import dataclass
23+
from mbed_tools_ci_scripts.utils.third_party_licences import (
24+
UNKNOWN_LICENCE,
25+
cleanse_licence_expression,
26+
is_licence_accepted,
27+
determine_licence_compound,
28+
)
2429

2530

2631
@dataclass(frozen=True, order=True)
@@ -58,6 +63,7 @@ def __init__(self, package_info: PackageInfo, is_dependency: bool = False,) -> N
5863
self._package_info = package_info
5964
self._file_list: Optional[List[Path]] = None
6065
self._actual_licence: Optional[str] = None
66+
self._main_licence: Optional[str] = None
6167

6268
@property
6369
def files(self) -> Optional[List[Path]]:
@@ -108,7 +114,17 @@ def main_licence(self) -> str:
108114
Returns:
109115
project's licence
110116
"""
111-
return self._package_info.metadata.licence
117+
if not self._main_licence:
118+
package_licence = self._package_info.metadata.licence
119+
self._main_licence = (
120+
cleanse_licence_expression(package_licence) if package_licence else UNKNOWN_LICENCE.identifier
121+
)
122+
return self._main_licence
123+
124+
@property
125+
def is_main_licence_accepted(self) -> bool:
126+
"""States whether the main licence of the package is part of the accepted licence list."""
127+
return is_licence_accepted(self.main_licence)
112128

113129
@property
114130
def licence(self) -> str:
@@ -126,6 +142,11 @@ def licence(self) -> str:
126142
)
127143
return self._actual_licence
128144

145+
@property
146+
def is_licence_accepted(self) -> bool:
147+
"""States whether the actual package's licence of the package is part of the accepted licence list."""
148+
return is_licence_accepted(self.licence)
149+
129150
@property
130151
def author(self) -> str:
131152
"""Gets the document's author.
@@ -168,7 +189,7 @@ def get_spdx_files(self) -> Optional[List[SpdxFile]]:
168189
Returns:
169190
list of file descriptions or None if a dependency.
170191
"""
171-
if not self.files or len(self.files) == 0:
192+
if not self.files:
172193
return None
173194
return [SpdxFile(p, self._package_info.root_dir, self.main_licence) for p in self.files]
174195

@@ -197,21 +218,22 @@ def generate_spdx_package(self) -> Package:
197218
Returns:
198219
the corresponding package
199220
"""
200-
package = Package()
221+
package = Package(
222+
name=determine_spdx_value(self.name),
223+
spdx_id=f"SPDXRef-{self.id}",
224+
download_location=determine_spdx_value(None),
225+
version=determine_spdx_value(self.version),
226+
file_name=determine_spdx_value(self.name),
227+
supplier=None,
228+
originator=Person(determine_spdx_value(self.author), determine_spdx_value(self.author_email)),
229+
)
201230
package.check_sum = Algorithm("SHA1", str(NoAssert()))
202231
package.cr_text = NoAssert()
203-
package.name = determine_spdx_value(self.name)
204-
package.version = determine_spdx_value(self.version)
205-
package.file_name = determine_spdx_value(self.name)
206-
package.download_location = determine_spdx_value(None)
207232
package.homepage = determine_spdx_value(self.url)
208-
package.originator = Person(determine_spdx_value(self.author), determine_spdx_value(self.author_email))
209233
package.license_declared = License.from_identifier(str(determine_spdx_value(self.main_licence)))
210234
package.conc_lics = License.from_identifier(str(determine_spdx_value(self.licence)))
211235
package.summary = determine_spdx_value(self.description)
212236
package.description = NoAssert()
213-
package.spdx_id = f"SPDXRef-{self.id}"
214-
215237
files = self.get_spdx_files()
216238
if files:
217239
package.files_analyzed = True
@@ -223,7 +245,7 @@ def generate_spdx_package(self) -> Package:
223245
else:
224246
# Has to generate a dummy file because of the following rule in SDK:
225247
# - Package must have at least one file
226-
dummy_file = SpdxFile(Path(UNKNOWN), self._package_info.root_dir, self.licence)
248+
dummy_file = SpdxFile(Path(UNKNOWN), self._package_info.root_dir, self.main_licence)
227249
package.verif_code = NoAssert()
228250
package.add_file(dummy_file.generate_spdx_file())
229251
package.add_lics_from_file(License.from_identifier(str(determine_spdx_value(dummy_file.licence))))

mbed_tools_ci_scripts/spdx_report/spdx_project.py

Lines changed: 74 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,15 @@
55
"""Definition of an SPDX report for a Python project."""
66

77
from pathlib import Path
8+
import os
89
from spdx.writers.tagvalue import write_document
10+
from typing import Optional, List, cast, Tuple, Dict
911

1012
from mbed_tools_ci_scripts.spdx_report.spdx_dependency import DependencySpdxDocumentRef
1113
from mbed_tools_ci_scripts.spdx_report.spdx_document import SpdxDocument
1214
from mbed_tools_ci_scripts.utils.hash_helpers import determine_sha1_hash_of_file
1315
from mbed_tools_ci_scripts.utils.package_helpers import ProjectMetadataParser
16+
from mbed_tools_ci_scripts.spdx_report.spdx_helpers import is_package_licence_checked
1417

1518

1619
class SpdxProject:
@@ -23,6 +26,30 @@ class SpdxProject:
2326
def __init__(self, parser: ProjectMetadataParser) -> None:
2427
"""Constructor."""
2528
self._parser = parser
29+
self._main_document: Optional[SpdxDocument] = None
30+
self._dependency_documents: Optional[List[SpdxDocument]] = None
31+
32+
def _generate_documents(self) -> None:
33+
if self._main_document:
34+
return
35+
self._dependency_documents = list()
36+
project_metadata = self._parser.project_metadata
37+
dependencies = project_metadata.dependencies_metadata
38+
for dependency in dependencies:
39+
self._dependency_documents.append(SpdxDocument(dependency, is_dependency=True))
40+
self._main_document = SpdxDocument(package_metadata=project_metadata.project_metadata)
41+
42+
@property
43+
def main_document(self) -> SpdxDocument:
44+
"""Gets project's main SPDX document."""
45+
self._generate_documents()
46+
return cast(SpdxDocument, self._main_document)
47+
48+
@property
49+
def dependency_documents(self) -> List[SpdxDocument]:
50+
"""Gets the list of project's dependencies SPDX documents."""
51+
self._generate_documents()
52+
return self._dependency_documents if self._dependency_documents else list()
2653

2754
@staticmethod
2855
def generate_tag_value_file(dir: Path, spdx_doc: SpdxDocument, filename: str = "LICENSE.spdx") -> str:
@@ -63,19 +90,58 @@ def generate_tag_value_files(self, dir: Path) -> None:
6390
if not dir.is_dir():
6491
raise NotADirectoryError(str(dir))
6592

66-
project_metadata = self._parser.project_metadata
67-
externalRefs = []
68-
dependencies = project_metadata.dependencies_metadata
69-
for dependency in dependencies:
70-
spdx_dependency = SpdxDocument(dependency, is_dependency=True)
93+
externalRefs = list()
94+
for spdx_dependency in self.dependency_documents:
7195
file_name = f"{spdx_dependency.name}.spdx"
7296
checksum = SpdxProject.generate_tag_value_file(dir, spdx_dependency, file_name)
7397
externalRefs.append(
7498
DependencySpdxDocumentRef(
7599
name=spdx_dependency.document_name, namespace=spdx_dependency.document_namespace, checksum=checksum,
76100
)
77101
)
78-
main_document = SpdxDocument(
79-
package_metadata=project_metadata.project_metadata, other_document_refs=externalRefs,
102+
self.main_document.external_refs = externalRefs
103+
SpdxProject.generate_tag_value_file(dir, self.main_document, f"{self.main_document.name}.spdx")
104+
105+
@staticmethod
106+
def _check_package_licence(package_document: SpdxDocument) -> Tuple[bool, bool, str, str, str]:
107+
package = package_document.generate_spdx_package()
108+
return (
109+
package.is_main_licence_accepted,
110+
package.is_licence_accepted,
111+
package.name,
112+
package.main_licence,
113+
package.licence,
80114
)
81-
SpdxProject.generate_tag_value_file(dir, main_document, f"{main_document.name}.spdx")
115+
116+
def _report_issues(self, issues: Dict[str, str]) -> None:
117+
if issues:
118+
raise ValueError(
119+
f",{os.linesep}".join(
120+
[
121+
f"Package [{package_name}] has a non-compliant licence ({package_licence}) for this project"
122+
for package_name, package_licence in issues.items()
123+
]
124+
)
125+
)
126+
127+
def _check_one_licence_compliance(self, spdx_document: SpdxDocument, issues: Dict[str, str]) -> None:
128+
main_valid, actual_valid, name, main_licence, actual_licence = SpdxProject._check_package_licence(spdx_document)
129+
if not ((main_valid and actual_valid) or is_package_licence_checked(name)):
130+
issues[name] = actual_licence if main_valid else main_licence
131+
132+
def _check_package_dependencies_licence_compliance(self, issues: Dict[str, str]) -> None:
133+
for dependency in self.dependency_documents:
134+
self._check_one_licence_compliance(dependency, issues)
135+
136+
def _check_package_licence_compliance(self, issues: Dict[str, str]) -> None:
137+
self._check_one_licence_compliance(self.main_document, issues)
138+
139+
def check_licence_compliance(self) -> None:
140+
"""Checks whether the licences of the package as well as all its dependencies are compliant.
141+
142+
By compliant, it is meant that all the licences are in the list of accepted licences set for the given project.
143+
"""
144+
issues: Dict[str, str] = dict()
145+
self._check_package_licence_compliance(issues)
146+
self._check_package_dependencies_licence_compliance(issues)
147+
self._report_issues(issues)

mbed_tools_ci_scripts/utils/configuration.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ class ConfigurationVariable(enum.Enum):
4646
IGNORE_PYPI_TEST_UPLOAD = 25
4747
FILE_LICENCE_IDENTIFIER = 26
4848
COPYRIGHT_START_DATE = 27
49+
ACCEPTED_THIRD_PARTY_LICENCES = 28
50+
PACKAGES_WITH_CHECKED_LICENCE = 29
4951

5052
@staticmethod
5153
def choices() -> List[str]:
@@ -146,6 +148,8 @@ class StaticConfig(GenericConfig):
146148
ORGANISATION_EMAIL = "support@mbed.com"
147149
FILE_LICENCE_IDENTIFIER = "Apache-2.0"
148150
COPYRIGHT_START_DATE = 2020
151+
ACCEPTED_THIRD_PARTY_LICENCES = ["Apache-2.0", "BSD*", "JSON", "MIT", "Python-2.0", "PSF-2.0", "MPL-2.0"]
152+
PACKAGES_WITH_CHECKED_LICENCE: List[str] = []
149153

150154
def _fetch_value(self, key: str) -> Any:
151155
try:

0 commit comments

Comments
 (0)