Skip to content

Commit f423cb7

Browse files
authored
Migrate Xen, Curl, Istio, Mozilla, GitHub OSV and OSS-Fuzz importer (#1946)
* Migrate Xen importer Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com> * Add tests for Xen importer Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com> * Migrate CURL importer Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com> * Fix tests Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com> * Add OSS Fuzz importer Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com> * Add OSS Fuzz importer Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com> * Migrate Istio importer Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com> * Add tests for OSS-FUZZ Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com> * Fix tests Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com> * Add postgresql importer Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com> * Add mozilla importer Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com> * Fix tests Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com> * Fix linting errors Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com> --------- Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
1 parent 903a8ed commit f423cb7

33 files changed

+1496
-653
lines changed

vulnerabilities/importer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from typing import Optional
2323
from typing import Set
2424
from typing import Tuple
25+
from typing import Union
2526

2627
import pytz
2728
from dateutil import parser as dateparser
@@ -361,6 +362,7 @@ class AdvisoryData:
361362
weaknesses: List[int] = dataclasses.field(default_factory=list)
362363
severities: List[VulnerabilitySeverity] = dataclasses.field(default_factory=list)
363364
url: Optional[str] = None
365+
original_advisory_text: Optional[str] = None
364366

365367
def __post_init__(self):
366368
if self.date_published and not self.date_published.tzinfo:

vulnerabilities/importers/__init__.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,29 +42,41 @@
4242
from vulnerabilities.pipelines import pypa_importer
4343
from vulnerabilities.pipelines import pysec_importer
4444
from vulnerabilities.pipelines.v2_importers import apache_httpd_importer as apache_httpd_v2
45+
from vulnerabilities.pipelines.v2_importers import curl_importer as curl_importer_v2
4546
from vulnerabilities.pipelines.v2_importers import (
4647
elixir_security_importer as elixir_security_importer_v2,
4748
)
48-
from vulnerabilities.pipelines.v2_importers import github_importer as github_importer_v2
49+
from vulnerabilities.pipelines.v2_importers import github_osv_importer as github_osv_importer_v2
4950
from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2
51+
from vulnerabilities.pipelines.v2_importers import istio_importer as istio_importer_v2
52+
from vulnerabilities.pipelines.v2_importers import mozilla_importer as mozilla_importer_v2
5053
from vulnerabilities.pipelines.v2_importers import npm_importer as npm_importer_v2
5154
from vulnerabilities.pipelines.v2_importers import nvd_importer as nvd_importer_v2
55+
from vulnerabilities.pipelines.v2_importers import oss_fuzz as oss_fuzz_v2
56+
from vulnerabilities.pipelines.v2_importers import postgresql_importer as postgresql_importer_v2
5257
from vulnerabilities.pipelines.v2_importers import pypa_importer as pypa_importer_v2
5358
from vulnerabilities.pipelines.v2_importers import pysec_importer as pysec_importer_v2
5459
from vulnerabilities.pipelines.v2_importers import vulnrichment_importer as vulnrichment_importer_v2
60+
from vulnerabilities.pipelines.v2_importers import xen_importer as xen_importer_v2
5561
from vulnerabilities.utils import create_registry
5662

5763
IMPORTERS_REGISTRY = create_registry(
5864
[
5965
nvd_importer_v2.NVDImporterPipeline,
6066
elixir_security_importer_v2.ElixirSecurityImporterPipeline,
61-
github_importer_v2.GitHubAPIImporterPipeline,
6267
npm_importer_v2.NpmImporterPipeline,
6368
vulnrichment_importer_v2.VulnrichImporterPipeline,
6469
apache_httpd_v2.ApacheHTTPDImporterPipeline,
6570
pypa_importer_v2.PyPaImporterPipeline,
6671
gitlab_importer_v2.GitLabImporterPipeline,
6772
pysec_importer_v2.PyPIImporterPipeline,
73+
xen_importer_v2.XenImporterPipeline,
74+
curl_importer_v2.CurlImporterPipeline,
75+
oss_fuzz_v2.OSSFuzzImporterPipeline,
76+
istio_importer_v2.IstioImporterPipeline,
77+
postgresql_importer_v2.PostgreSQLImporterPipeline,
78+
mozilla_importer_v2.MozillaImporterPipeline,
79+
github_osv_importer_v2.GithubOSVImporterPipeline,
6880
nvd_importer.NVDImporterPipeline,
6981
github_importer.GitHubAPIImporterPipeline,
7082
gitlab_importer.GitLabImporterPipeline,

vulnerabilities/importers/curl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def parse_advisory_data(raw_data) -> AdvisoryData:
9797
... ]
9898
... }
9999
>>> parse_advisory_data(raw_data)
100-
AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], url='https://curl.se/docs/CVE-2024-2379.json')
100+
AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], url='https://curl.se/docs/CVE-2024-2379.json', original_advisory_text=None)
101101
"""
102102

103103
affected = get_item(raw_data, "affected")[0] if len(get_item(raw_data, "affected")) > 0 else []

vulnerabilities/importers/osv.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99

10+
import json
1011
import logging
1112
from typing import Iterable
1213
from typing import List
@@ -109,7 +110,7 @@ def parse_advisory_data(
109110

110111

111112
def parse_advisory_data_v2(
112-
raw_data: dict, supported_ecosystems, advisory_url: str
113+
raw_data: dict, supported_ecosystems, advisory_url: str, advisory_text: str
113114
) -> Optional[AdvisoryData]:
114115
"""
115116
Return an AdvisoryData build from a ``raw_data`` mapping of OSV advisory and
@@ -173,6 +174,7 @@ def parse_advisory_data_v2(
173174
date_published=date_published,
174175
weaknesses=weaknesses,
175176
url=advisory_url,
177+
original_advisory_text=advisory_text or json.dumps(raw_data, indent=2, ensure_ascii=False),
176178
)
177179

178180

vulnerabilities/improvers/__init__.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from vulnerabilities.improvers import valid_versions
1111
from vulnerabilities.improvers import vulnerability_status
1212
from vulnerabilities.pipelines import add_cvss31_to_CVEs
13-
from vulnerabilities.pipelines import collect_commits
1413
from vulnerabilities.pipelines import compute_advisory_todo
1514
from vulnerabilities.pipelines import compute_package_risk
1615
from vulnerabilities.pipelines import compute_package_version_rank
@@ -20,7 +19,6 @@
2019
from vulnerabilities.pipelines import flag_ghost_packages
2120
from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline
2221
from vulnerabilities.pipelines import remove_duplicate_advisories
23-
from vulnerabilities.pipelines.v2_improvers import collect_commits as collect_commits_v2
2422
from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2
2523
from vulnerabilities.pipelines.v2_improvers import (
2624
computer_package_version_rank as compute_version_rank_v2,
@@ -58,7 +56,6 @@
5856
enhance_with_exploitdb.ExploitDBImproverPipeline,
5957
compute_package_risk.ComputePackageRiskPipeline,
6058
compute_package_version_rank.ComputeVersionRankPipeline,
61-
collect_commits.CollectFixCommitsPipeline,
6259
add_cvss31_to_CVEs.CVEAdvisoryMappingPipeline,
6360
remove_duplicate_advisories.RemoveDuplicateAdvisoriesPipeline,
6461
populate_vulnerability_summary_pipeline.PopulateVulnerabilitySummariesPipeline,
@@ -68,7 +65,6 @@
6865
enhance_with_metasploit_v2.MetasploitImproverPipeline,
6966
compute_package_risk_v2.ComputePackageRiskPipeline,
7067
compute_version_rank_v2.ComputeVersionRankPipeline,
71-
collect_commits_v2.CollectFixCommitsPipeline,
7268
compute_advisory_todo.ComputeToDo,
7369
]
7470
)
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Generated by Django 4.2.22 on 2025-07-16 08:39
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("vulnerabilities", "0098_alter_advisory_options_alter_advisoryalias_options_and_more"),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name="advisoryv2",
15+
name="original_advisory_text",
16+
field=models.TextField(
17+
blank=True,
18+
help_text="Raw advisory data as collected from the upstream datasource.",
19+
null=True,
20+
),
21+
),
22+
]

vulnerabilities/models.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2744,6 +2744,12 @@ class AdvisoryV2(models.Model):
27442744
blank=True, null=True, help_text="UTC Date on which the advisory was imported"
27452745
)
27462746

2747+
original_advisory_text = models.TextField(
2748+
blank=True,
2749+
null=True,
2750+
help_text="Raw advisory data as collected from the upstream datasource.",
2751+
)
2752+
27472753
affecting_packages = models.ManyToManyField(
27482754
"PackageV2",
27492755
related_name="affected_by_advisories",

vulnerabilities/pipelines/__init__.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -307,13 +307,20 @@ def collect_and_store_advisories(self):
307307
if advisory is None:
308308
self.log("Advisory is None, skipping")
309309
continue
310-
if _obj := insert_advisory_v2(
311-
advisory=advisory,
312-
pipeline_id=self.pipeline_id,
313-
get_advisory_packages=self.get_advisory_packages,
314-
logger=self.log,
315-
):
316-
collected_advisory_count += 1
310+
try:
311+
if _obj := insert_advisory_v2(
312+
advisory=advisory,
313+
pipeline_id=self.pipeline_id,
314+
get_advisory_packages=self.get_advisory_packages,
315+
logger=self.log,
316+
):
317+
collected_advisory_count += 1
318+
except Exception as e:
319+
self.log(
320+
f"Failed to import advisory: {advisory!r} with error {e!r}:\n{traceback_format_exc()}",
321+
level=logging.ERROR,
322+
)
323+
continue
317324

318325
self.log(f"Successfully collected {collected_advisory_count:,d} advisories")
319326

vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99

10+
import json
1011
import logging
1112
import re
1213
import urllib.parse
1314
from typing import Iterable
1415

1516
import requests
1617
from bs4 import BeautifulSoup
18+
from dateutil import parser as date_parser
1719
from packageurl import PackageURL
1820
from univers.version_constraint import VersionConstraint
1921
from univers.version_range import ApacheVersionRange
@@ -272,8 +274,11 @@ def to_advisory(self, data):
272274
versions_data.append(version_data)
273275

274276
fixed_versions = []
277+
date_published = None
275278
for timeline_object in data.get("timeline") or []:
276279
timeline_value = timeline_object.get("value")
280+
if timeline_value == "public":
281+
date_published = timeline_object.get("time")
277282
if "release" in timeline_value:
278283
split_timeline_value = timeline_value.split(" ")
279284
if "never" in timeline_value:
@@ -307,6 +312,8 @@ def to_advisory(self, data):
307312
weaknesses=weaknesses,
308313
url=reference.url,
309314
severities=severities,
315+
original_advisory_text=json.dumps(data, indent=2, ensure_ascii=False),
316+
date_published=date_parser.parse(date_published) if date_published else None,
310317
)
311318

312319
def to_version_ranges(self, versions_data, fixed_versions):
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
6+
import json
7+
import logging
8+
from datetime import datetime
9+
from datetime import timezone
10+
from typing import Iterable
11+
12+
from cwe2.database import Database
13+
from packageurl import PackageURL
14+
from univers.version_range import GenericVersionRange
15+
from univers.versions import SemverVersion
16+
17+
from vulnerabilities.importer import AdvisoryData
18+
from vulnerabilities.importer import AffectedPackage
19+
from vulnerabilities.importer import ReferenceV2
20+
from vulnerabilities.importer import VulnerabilitySeverity
21+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
22+
from vulnerabilities.severity_systems import SCORING_SYSTEMS
23+
from vulnerabilities.utils import fetch_response
24+
from vulnerabilities.utils import get_cwe_id
25+
from vulnerabilities.utils import get_item
26+
27+
logger = logging.getLogger(__name__)
28+
29+
30+
class CurlImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
31+
"""
32+
Pipeline-based importer for curl advisories from curl.se.
33+
"""
34+
35+
pipeline_id = "curl_importer_v2"
36+
spdx_license_expression = "curl"
37+
license_url = "https://curl.se/docs/copyright.html"
38+
repo_url = "https://github.com/curl/curl-www/"
39+
url = "https://curl.se/docs/vuln.json"
40+
unfurl_version_ranges = True
41+
42+
@classmethod
43+
def steps(cls):
44+
return (cls.collect_and_store_advisories,)
45+
46+
def fetch_data(self):
47+
return fetch_response(self.url).json()
48+
49+
def advisories_count(self) -> int:
50+
return len(self.fetch_data())
51+
52+
def collect_advisories(self) -> Iterable[AdvisoryData]:
53+
for entry in self.fetch_data():
54+
cve_id = entry.get("aliases") or []
55+
cve_id = cve_id[0] if cve_id else None
56+
if not cve_id or not cve_id.startswith("CVE"):
57+
package = get_item(entry, "database_specific", "package")
58+
logger.error(f"Invalid CVE ID: {cve_id} in package {package}")
59+
continue
60+
yield parse_curl_advisory(entry)
61+
62+
63+
def parse_curl_advisory(raw_data) -> AdvisoryData:
64+
"""
65+
Parse advisory data from raw JSON data and return an AdvisoryData object.
66+
67+
Args:
68+
raw_data (dict): Raw JSON data containing advisory information.
69+
70+
Returns:
71+
AdvisoryData: Parsed advisory data as an AdvisoryData object.
72+
"""
73+
affected = get_item(raw_data, "affected")[0] if len(get_item(raw_data, "affected")) > 0 else []
74+
75+
ranges = get_item(affected, "ranges")[0] if len(get_item(affected, "ranges")) > 0 else []
76+
events = get_item(ranges, "events")[1] if len(get_item(ranges, "events")) > 1 else {}
77+
version_type = get_item(ranges, "type") if get_item(ranges, "type") else ""
78+
fixed_version = events.get("fixed")
79+
if version_type == "SEMVER" and fixed_version:
80+
fixed_version = SemverVersion(fixed_version)
81+
82+
purl = PackageURL(type="generic", namespace="curl.se", name="curl")
83+
versions = affected.get("versions") or []
84+
affected_version_range = GenericVersionRange.from_versions(versions)
85+
86+
affected_package = AffectedPackage(
87+
package=purl,
88+
affected_version_range=affected_version_range,
89+
fixed_version=fixed_version,
90+
)
91+
92+
database_specific = raw_data.get("database_specific") or {}
93+
94+
references = []
95+
www_url = database_specific.get("www")
96+
issue_url = database_specific.get("issue")
97+
json_url = database_specific.get("URL")
98+
99+
if www_url:
100+
references.append(ReferenceV2(url=www_url))
101+
if issue_url:
102+
references.append(ReferenceV2(url=issue_url))
103+
severity = VulnerabilitySeverity(
104+
system=SCORING_SYSTEMS["cvssv3.1"], value=database_specific.get("severity", ""), url=www_url
105+
)
106+
107+
published = raw_data.get("published", "")
108+
date_published = (
109+
datetime.strptime(published, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=timezone.utc)
110+
if published
111+
else None
112+
)
113+
114+
weaknesses = get_cwe_from_curl_advisory(raw_data)
115+
116+
aliases = raw_data.get("aliases", [])
117+
advisory_id = raw_data.get("id") or ""
118+
119+
if advisory_id in aliases:
120+
aliases.remove(advisory_id)
121+
122+
return AdvisoryData(
123+
advisory_id=advisory_id,
124+
aliases=aliases,
125+
summary=raw_data.get("summary") or "",
126+
affected_packages=[affected_package],
127+
references_v2=references,
128+
date_published=date_published,
129+
weaknesses=weaknesses,
130+
url=json_url,
131+
severities=[severity],
132+
original_advisory_text=json.dumps(raw_data, indent=2, ensure_ascii=False),
133+
)
134+
135+
136+
def get_cwe_from_curl_advisory(raw_data):
137+
"""
138+
Extracts CWE IDs from the given raw_data and returns a list of CWE IDs.
139+
140+
>>> get_cwe_from_curl_advisory({"database_specific": {"CWE": {"id": "CWE-333"}}})
141+
[333]
142+
>>> get_cwe_from_curl_advisory({"database_specific": {"CWE": {"id": ""}}})
143+
[]
144+
"""
145+
weaknesses = []
146+
db = Database()
147+
cwe_string = get_item(raw_data, "database_specific", "CWE", "id") or ""
148+
149+
if cwe_string:
150+
try:
151+
cwe_id = get_cwe_id(cwe_string)
152+
db.get(cwe_id) # validate CWE exists
153+
weaknesses.append(cwe_id)
154+
except Exception:
155+
logger.error(f"Invalid CWE id: {cwe_string}")
156+
return weaknesses

0 commit comments

Comments
 (0)