-
-
Notifications
You must be signed in to change notification settings - Fork 244
Add support for affected_by_commits, fixed_by_commits, and OSV code fix commits #2017
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 4 commits
a8ec9f1
be7dd91
c1d1f21
b505021
b882fa4
594ca0b
25a3f0e
619fb7e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -37,8 +37,8 @@ | |
| from vulnerabilities.severity_systems import ScoringSystem | ||
| from vulnerabilities.utils import classproperty | ||
| from vulnerabilities.utils import get_reference_id | ||
| from vulnerabilities.utils import is_commit | ||
| from vulnerabilities.utils import is_cve | ||
| from vulnerabilities.utils import nearest_patched_package | ||
| from vulnerabilities.utils import purl_to_dict | ||
| from vulnerabilities.utils import update_purl_version | ||
|
|
||
|
|
@@ -194,6 +194,64 @@ def from_url(cls, url): | |
| return cls(url=url) | ||
|
|
||
|
|
||
| @dataclasses.dataclass(eq=True) | ||
| @functools.total_ordering | ||
| class CodeCommitData: | ||
| commit_hash: str | ||
| vcs_url: str | ||
|
|
||
| commit_author: Optional[str] = None | ||
| commit_message: Optional[str] = None | ||
| commit_date: Optional[datetime.datetime] = None | ||
|
|
||
| def __post_init__(self): | ||
| if not self.commit_hash: | ||
| raise ValueError("Commit must have a non-empty commit_hash.") | ||
|
|
||
| if not is_commit(self.commit_hash): | ||
| raise ValueError("Commit must be a valid a commit_hash.") | ||
|
|
||
| if not self.vcs_url: | ||
| raise ValueError("Commit must have a non-empty vcs_url.") | ||
|
|
||
| def __lt__(self, other): | ||
| if not isinstance(other, CodeCommitData): | ||
| return NotImplemented | ||
| return self._cmp_key() < other._cmp_key() | ||
|
|
||
| # TODO: Add cache | ||
| def _cmp_key(self): | ||
| return ( | ||
| self.commit_hash, | ||
| self.vcs_url, | ||
| self.commit_author, | ||
| self.commit_message, | ||
| self.commit_date, | ||
| ) | ||
|
|
||
| def to_dict(self) -> dict: | ||
| """Return a normalized dictionary representation of the commit.""" | ||
| return { | ||
| "commit_hash": self.commit_hash, | ||
| "vcs_url": self.vcs_url, | ||
| "commit_author": self.commit_author, | ||
| "commit_message": self.commit_message, | ||
| "commit_date": self.commit_date, | ||
| } | ||
|
|
||
| @classmethod | ||
| def from_dict(cls, data: dict): | ||
| """Create a Commit instance from a dictionary.""" | ||
| commit_date = data.get("commit_date") | ||
| return cls( | ||
| commit_hash=str(data.get("commit_hash", "")), | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ziadhany there is no point of creating commits with empty strings. If commit hash is not present then let's not create a commit at all. |
||
| vcs_url=data.get("vcs_url", ""), | ||
| commit_author=data.get("commit_author"), | ||
| commit_message=data.get("commit_message"), | ||
| commit_date=datetime.datetime.fromisoformat(commit_date) if commit_date else None, | ||
| ) | ||
|
|
||
|
|
||
| class UnMergeablePackageError(Exception): | ||
| """ | ||
| Raised when a package cannot be merged with another one. | ||
|
|
@@ -444,6 +502,8 @@ class AdvisoryData: | |
| date_published: Optional[datetime.datetime] = None | ||
| weaknesses: List[int] = dataclasses.field(default_factory=list) | ||
| severities: List[VulnerabilitySeverity] = dataclasses.field(default_factory=list) | ||
| fixed_by_commits: List[CodeCommitData] = dataclasses.field(default_factory=list) | ||
TG1999 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| affected_by_commits: List[CodeCommitData] = dataclasses.field(default_factory=list) | ||
|
Comment on lines
+505
to
+506
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not correct. That's the only way to properly capture which version/commit range is fixed by which version/commit. An advisory can have multiple such sets of affected and fixed versions/commits.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @keshav-space Yes, I think it should be part of AffectedPackage, but this would force us to create an ImpactedPackage relation before we can store fix commits. For example, if we have a pipeline that loops over 1,000 Git repositories to extract fixed commits, we might not be able to get ImpactedPackages (because there’s no purl provided for AffectedPackage). Therefore, I think Not sure but I can reimplement this if needed, I thought we have a previous discussion on this @TG1999
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Don't you have vcs url for this commit? we can create purl from vcs url. Can you share some example.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's say we have a Swift package: The possible purls for this package could be:
Now, consider another example - perhaps a GitLab or Android repository: We can have multiple purls, but if no purl is provided by the data source, we might need to allow some tolerance in how we store the CodeCommit with ImpactedPackage.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ziadhany in both the example you shared i see no reason why we can not create And for https://github.com/LiYanan2004/SFSymbolKit at the very least we should create AffectedPackage with a github purl
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Then how will we store vcs URL, and commit hashes ? Since current affectedpackage data class does not support any of this ?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That’s the change this PR should make i.e. to add the
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are two ways we import the data: we need to know which is the best way OR
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we're going with the second one. |
||
| url: Optional[str] = None | ||
| original_advisory_text: Optional[str] = None | ||
|
|
||
|
|
@@ -476,6 +536,12 @@ def to_dict(self): | |
| "severities": [sev.to_dict() for sev in self.severities], | ||
| "date_published": self.date_published.isoformat() if self.date_published else None, | ||
| "weaknesses": self.weaknesses, | ||
| "affected_by_commits": [ | ||
| affected_by_commit.to_dict() for affected_by_commit in self.affected_by_commits | ||
| ], | ||
| "fixed_by_commits": [ | ||
| fixed_by_commit.to_dict() for fixed_by_commit in self.fixed_by_commits | ||
| ], | ||
| "url": self.url if self.url else "", | ||
| } | ||
| return { | ||
|
|
@@ -536,6 +602,8 @@ class AdvisoryDataV2: | |
| date_published: Optional[datetime.datetime] = None | ||
| weaknesses: List[int] = dataclasses.field(default_factory=list) | ||
| url: Optional[str] = None | ||
| fixed_by_commits: List[CodeCommitData] = dataclasses.field(default_factory=list) | ||
| affected_by_commits: List[CodeCommitData] = dataclasses.field(default_factory=list) | ||
|
|
||
| def __post_init__(self): | ||
| if self.date_published and not self.date_published.tzinfo: | ||
|
|
@@ -559,6 +627,12 @@ def to_dict(self): | |
| "references": [ref.to_dict() for ref in self.references], | ||
| "date_published": self.date_published.isoformat() if self.date_published else None, | ||
| "weaknesses": self.weaknesses, | ||
| "affected_by_commits": [ | ||
| affected_by_commit.to_dict() for affected_by_commit in self.affected_by_commits | ||
| ], | ||
| "fixed_by_commits": [ | ||
| fixed_by_commit.to_dict() for fixed_by_commit in self.fixed_by_commits | ||
| ], | ||
| "url": self.url if self.url else "", | ||
| } | ||
|
|
||
|
|
@@ -578,6 +652,14 @@ def from_dict(cls, advisory_data): | |
| if date_published | ||
| else None, | ||
| "weaknesses": advisory_data["weaknesses"], | ||
| "affected_by_commits": [ | ||
| CodeCommitData.from_dict(affected_by_commit) | ||
| for affected_by_commit in advisory_data["affected_by_commits"] | ||
| ], | ||
| "fixed_by_commits": [ | ||
| CodeCommitData.from_dict(fixed_by_commit) | ||
| for fixed_by_commit in advisory_data["fixed_by_commits"] | ||
| ], | ||
| "url": advisory_data.get("url") or None, | ||
| } | ||
| return cls(**transformed) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.