Skip to content

Commit 550f725

Browse files
committed
Optimize creation of TODO and advisory TODO M2M relations
Signed-off-by: Keshav Priyadarshi <git@keshav.space>
1 parent f017687 commit 550f725

File tree

4 files changed

+187
-48
lines changed

4 files changed

+187
-48
lines changed

vulnerabilities/migrations/0093_advisorytodo.py renamed to vulnerabilities/migrations/0093_advisorytodo_todorelatedadvisory_and_more.py

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
# Generated by Django 4.2.20 on 2025-06-03 18:13
1+
# Generated by Django 4.2.22 on 2025-06-27 15:59
22

33
from django.db import migrations, models
4+
import django.db.models.deletion
45

56

67
class Migration(migrations.Migration):
@@ -90,17 +91,47 @@ class Migration(migrations.Migration):
9091
blank=True, help_text="Additional detail on how this TODO was resolved."
9192
),
9293
),
94+
],
95+
),
96+
migrations.CreateModel(
97+
name="ToDoRelatedAdvisory",
98+
fields=[
9399
(
94-
"advisories",
95-
models.ManyToManyField(
96-
help_text="Advisory/ies where this TODO is applicable.",
97-
related_name="advisory_todos",
98-
to="vulnerabilities.advisory",
100+
"id",
101+
models.AutoField(
102+
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
103+
),
104+
),
105+
(
106+
"advisory",
107+
models.ForeignKey(
108+
on_delete=django.db.models.deletion.CASCADE, to="vulnerabilities.advisory"
109+
),
110+
),
111+
(
112+
"todo",
113+
models.ForeignKey(
114+
on_delete=django.db.models.deletion.CASCADE,
115+
to="vulnerabilities.advisorytodo",
99116
),
100117
),
101118
],
102119
options={
103-
"unique_together": {("related_advisories_id", "issue_type")},
120+
"unique_together": {("todo", "advisory")},
104121
},
105122
),
123+
migrations.AddField(
124+
model_name="advisorytodo",
125+
name="advisories",
126+
field=models.ManyToManyField(
127+
help_text="Advisory/ies where this TODO is applicable.",
128+
related_name="advisory_todos",
129+
through="vulnerabilities.ToDoRelatedAdvisory",
130+
to="vulnerabilities.advisory",
131+
),
132+
),
133+
migrations.AlterUniqueTogether(
134+
name="advisorytodo",
135+
unique_together={("related_advisories_id", "issue_type")},
136+
),
106137
]

vulnerabilities/models.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2286,16 +2286,19 @@ class AdvisoryToDo(models.Model):
22862286
# to avoid creating duplicate issue for same set of advisories,
22872287
related_advisories_id = models.CharField(
22882288
max_length=40,
2289-
blank=False,
2290-
null=False,
22912289
help_text="SHA1 digest of the unique_content_id field of the applicable advisories.",
22922290
)
22932291

2292+
advisories = models.ManyToManyField(
2293+
Advisory,
2294+
through="ToDoRelatedAdvisory",
2295+
related_name="advisory_todos",
2296+
help_text="Advisory/ies where this TODO is applicable.",
2297+
)
2298+
22942299
issue_type = models.CharField(
22952300
max_length=50,
22962301
choices=ISSUE_TYPE_CHOICES,
2297-
blank=False,
2298-
null=False,
22992302
db_index=True,
23002303
help_text="Select the issue that needs to be addressed from the available options.",
23012304
)
@@ -2305,12 +2308,6 @@ class AdvisoryToDo(models.Model):
23052308
help_text="Additional details about the issue.",
23062309
)
23072310

2308-
advisories = models.ManyToManyField(
2309-
Advisory,
2310-
related_name="advisory_todos",
2311-
help_text="Advisory/ies where this TODO is applicable.",
2312-
)
2313-
23142311
created_at = models.DateTimeField(
23152312
auto_now_add=True,
23162313
help_text="Timestamp indicating when this TODO was created.",
@@ -2339,3 +2336,18 @@ class Meta:
23392336
def save(self, *args, **kwargs):
23402337
self.full_clean()
23412338
return super().save(*args, **kwargs)
2339+
2340+
2341+
class ToDoRelatedAdvisory(models.Model):
2342+
todo = models.ForeignKey(
2343+
AdvisoryToDo,
2344+
on_delete=models.CASCADE,
2345+
)
2346+
2347+
advisory = models.ForeignKey(
2348+
Advisory,
2349+
on_delete=models.CASCADE,
2350+
)
2351+
2352+
class Meta:
2353+
unique_together = ("todo", "advisory")

vulnerabilities/pipelines/compute_advisory_todo.py

Lines changed: 112 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,12 @@
1111
import json
1212

1313
from aboutcode.pipeline import LoopProgress
14+
from django.utils import timezone
1415

1516
from vulnerabilities.models import Advisory
1617
from vulnerabilities.models import AdvisoryToDo
1718
from vulnerabilities.models import Alias
19+
from vulnerabilities.models import ToDoRelatedAdvisory
1820
from vulnerabilities.pipelines import VulnerableCodePipeline
1921
from vulnerabilities.pipes.advisory import advisories_checksum
2022

@@ -32,8 +34,14 @@ def steps(cls):
3234
)
3335

3436
def compute_individual_advisory_todo(self):
35-
advisories = Advisory.objects.all().iterator(chunk_size=5000)
36-
advisories_count = Advisory.objects.all().count()
37+
"""Create ToDos for missing summary, affected and fixed packages."""
38+
39+
advisories = Advisory.objects.all()
40+
advisories_count = advisories.count()
41+
advisory_relation_to_create = {}
42+
todo_to_create = []
43+
new_todos_count = 0
44+
batch_size = 5000
3745

3846
self.log(
3947
f"Checking missing summary, affected and fixed packages in {advisories_count} Advisories"
@@ -43,23 +51,48 @@ def compute_individual_advisory_todo(self):
4351
logger=self.log,
4452
progress_step=1,
4553
)
46-
for advisory in progress.iter(advisories):
54+
for advisory in progress.iter(advisories.iterator(chunk_size=5000)):
4755
advisory_todo_id = advisories_checksum(advisories=advisory)
4856
check_missing_summary(
4957
advisory=advisory,
5058
todo_id=advisory_todo_id,
51-
logger=self.log,
59+
todo_to_create=todo_to_create,
60+
advisory_relation_to_create=advisory_relation_to_create,
5261
)
5362

5463
check_missing_affected_and_fixed_by_packages(
5564
advisory=advisory,
5665
todo_id=advisory_todo_id,
57-
logger=self.log,
66+
todo_to_create=todo_to_create,
67+
advisory_relation_to_create=advisory_relation_to_create,
5868
)
5969

70+
if len(todo_to_create) > batch_size:
71+
new_todos_count += bulk_create_with_m2m(
72+
todos=todo_to_create,
73+
advisories=advisory_relation_to_create,
74+
logger=self.log,
75+
)
76+
advisory_relation_to_create.clear()
77+
todo_to_create.clear()
78+
79+
new_todos_count += bulk_create_with_m2m(
80+
todos=todo_to_create,
81+
advisories=advisory_relation_to_create,
82+
logger=self.log,
83+
)
84+
6085
def detect_conflicting_advisories(self):
86+
"""
87+
Create ToDos for advisories with conflicting opinions on fixed and affected
88+
package versions for a vulnerability.
89+
"""
6190
aliases = Alias.objects.filter(alias__istartswith="cve")
6291
aliases_count = aliases.count()
92+
advisory_relation_to_create = {}
93+
todo_to_create = []
94+
new_todos_count = 0
95+
batch_size = 5000
6396

6497
self.log(f"Cross validating advisory affected and fixed package for {aliases_count} CVEs")
6598

@@ -73,24 +106,50 @@ def detect_conflicting_advisories(self):
73106
advisory_todos__issue_type="MISSING_AFFECTED_AND_FIXED_BY_PACKAGES"
74107
).distinct()
75108

76-
check_conflicting_affected_and_fixed_by_packages(
109+
check_conflicting_affected_and_fixed_by_packages_for_alias(
77110
advisories=advisories,
78111
cve=alias,
79-
logger=self.log,
112+
todo_to_create=todo_to_create,
113+
advisory_relation_to_create=advisory_relation_to_create,
80114
)
81115

116+
if len(todo_to_create) > batch_size:
117+
new_todos_count += bulk_create_with_m2m(
118+
todos=todo_to_create,
119+
advisories=advisory_relation_to_create,
120+
logger=self.log,
121+
)
122+
advisory_relation_to_create.clear()
123+
todo_to_create.clear()
124+
125+
new_todos_count += bulk_create_with_m2m(
126+
todos=todo_to_create,
127+
advisories=advisory_relation_to_create,
128+
logger=self.log,
129+
)
82130

83-
def check_missing_summary(advisory, todo_id, logger=None):
131+
132+
def check_missing_summary(
133+
advisory,
134+
todo_id,
135+
todo_to_create,
136+
advisory_relation_to_create,
137+
):
84138
if not advisory.summary:
85-
todo, created = AdvisoryToDo.objects.get_or_create(
139+
todo = AdvisoryToDo(
86140
related_advisories_id=todo_id,
87141
issue_type="MISSING_SUMMARY",
88142
)
89-
if created:
90-
todo.advisories.add(advisory)
143+
advisory_relation_to_create[todo_id] = [advisory]
144+
todo_to_create.append(todo)
91145

92146

93-
def check_missing_affected_and_fixed_by_packages(advisory, todo_id, logger=None):
147+
def check_missing_affected_and_fixed_by_packages(
148+
advisory,
149+
todo_id,
150+
todo_to_create,
151+
advisory_relation_to_create,
152+
):
94153
"""
95154
Check for missing affected or fixed-by packages in the advisory
96155
and create appropriate AdvisoryToDo.
@@ -121,15 +180,21 @@ def check_missing_affected_and_fixed_by_packages(advisory, todo_id, logger=None)
121180
issue_type = "MISSING_AFFECTED_PACKAGE"
122181
elif not has_fixed_package:
123182
issue_type = "MISSING_FIXED_BY_PACKAGE"
124-
todo, created = AdvisoryToDo.objects.get_or_create(
183+
184+
todo = AdvisoryToDo(
125185
related_advisories_id=todo_id,
126186
issue_type=issue_type,
127187
)
128-
if created:
129-
todo.advisories.add(advisory)
188+
todo_to_create.append(todo)
189+
advisory_relation_to_create[todo_id] = [advisory]
130190

131191

132-
def check_conflicting_affected_and_fixed_by_packages(advisories, cve, logger=None):
192+
def check_conflicting_affected_and_fixed_by_packages_for_alias(
193+
advisories,
194+
cve,
195+
todo_to_create,
196+
advisory_relation_to_create,
197+
):
133198
"""
134199
Add appropriate AdvisoryToDo for conflicting affected/fixed packages.
135200
@@ -222,15 +287,13 @@ def check_conflicting_affected_and_fixed_by_packages(advisories, cve, logger=Non
222287
messages.append("Comparison matrix:")
223288
messages.append(json.dumps(matrix, indent=2, default=list))
224289
todo_id = advisories_checksum(advisories)
225-
todo, created = AdvisoryToDo.objects.get_or_create(
290+
todo = AdvisoryToDo(
226291
related_advisories_id=todo_id,
227292
issue_type=issue_type,
228-
defaults={
229-
"issue_detail": "\n".join(messages),
230-
},
293+
issue_detail="\n".join(messages),
231294
)
232-
if created:
233-
todo.advisories.add(*advisories)
295+
todo_to_create.append(todo)
296+
advisory_relation_to_create[todo_id] = list(advisories)
234297

235298

236299
def initialize_sub_matrix(matrix, affected_purl, advisory):
@@ -245,3 +308,30 @@ def initialize_sub_matrix(matrix, affected_purl, advisory):
245308
matrix[affected_purl]["affected"][advisory_id] = set()
246309
if advisory not in matrix[affected_purl]["fixed"]:
247310
matrix[affected_purl]["fixed"][advisory_id] = set()
311+
312+
313+
def bulk_create_with_m2m(todos, advisories, logger):
314+
"""Bulk create ToDos and also bulk create M2M ToDo Advisory relationships."""
315+
if not todos:
316+
return 0
317+
318+
start_time = timezone.now()
319+
try:
320+
AdvisoryToDo.objects.bulk_create(objs=todos, ignore_conflicts=True)
321+
except Exception as e:
322+
logger(f"Error creating AdvisoryToDo: {e}")
323+
324+
new_todos = AdvisoryToDo.objects.filter(created_at__gte=start_time)
325+
326+
relations = [
327+
ToDoRelatedAdvisory(todo=todo, advisory=advisory)
328+
for todo in new_todos
329+
for advisory in advisories[todo.related_advisories_id]
330+
]
331+
332+
try:
333+
ToDoRelatedAdvisory.objects.bulk_create(relations)
334+
except Exception as e:
335+
logger(f"Error creating Advisory ToDo relations: {e}")
336+
337+
return new_todos.count()

vulnerabilities/tests/pipelines/test_compute_advisory_todo.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,10 @@ def test_advisory_todo_missing_summary(self):
8888
pipeline = ComputeToDo()
8989
pipeline.execute()
9090

91-
todos = AdvisoryToDo.objects.first()
91+
todo = AdvisoryToDo.objects.first()
9292
self.assertEqual(1, AdvisoryToDo.objects.count())
93-
self.assertEqual("MISSING_SUMMARY", todos.issue_type)
93+
self.assertEqual("MISSING_SUMMARY", todo.issue_type)
94+
self.assertEqual(1, todo.advisories.count())
9495

9596
def test_advisory_todo_missing_fixed(self):
9697
date = datetime.now()
@@ -107,9 +108,10 @@ def test_advisory_todo_missing_fixed(self):
107108
pipeline = ComputeToDo()
108109
pipeline.execute()
109110

110-
todos = AdvisoryToDo.objects.first()
111+
todo = AdvisoryToDo.objects.first()
111112
self.assertEqual(1, AdvisoryToDo.objects.count())
112-
self.assertEqual("MISSING_FIXED_BY_PACKAGE", todos.issue_type)
113+
self.assertEqual("MISSING_FIXED_BY_PACKAGE", todo.issue_type)
114+
self.assertEqual(1, todo.advisories.count())
113115

114116
def test_advisory_todo_missing_affected(self):
115117
date = datetime.now()
@@ -126,9 +128,10 @@ def test_advisory_todo_missing_affected(self):
126128
pipeline = ComputeToDo()
127129
pipeline.execute()
128130

129-
todos = AdvisoryToDo.objects.first()
131+
todo = AdvisoryToDo.objects.first()
130132
self.assertEqual(1, AdvisoryToDo.objects.count())
131-
self.assertEqual("MISSING_AFFECTED_PACKAGE", todos.issue_type)
133+
self.assertEqual("MISSING_AFFECTED_PACKAGE", todo.issue_type)
134+
self.assertEqual(1, todo.advisories.count())
132135

133136
def test_advisory_todo_conflicting_fixed_affected(self):
134137
alias = Alias.objects.create(alias="CVE-0000-0000")
@@ -156,12 +159,15 @@ def test_advisory_todo_conflicting_fixed_affected(self):
156159
)
157160
adv2.aliases.add(alias)
158161

162+
self.assertEqual(0, AdvisoryToDo.objects.count())
159163
pipeline = ComputeToDo()
160164
pipeline.execute()
161165

162-
todos = AdvisoryToDo.objects.first()
166+
todo = AdvisoryToDo.objects.first()
163167
self.assertEqual(1, AdvisoryToDo.objects.count())
164-
self.assertEqual("CONFLICTING_AFFECTED_AND_FIXED_BY_PACKAGES", todos.issue_type)
168+
self.assertEqual("CONFLICTING_AFFECTED_AND_FIXED_BY_PACKAGES", todo.issue_type)
165169
self.assertIn(
166-
"CVE-0000-0000: pkg:npm/package1 with conflicting fixed version", todos.issue_detail
170+
"CVE-0000-0000: pkg:npm/package1 with conflicting fixed version", todo.issue_detail
167171
)
172+
self.assertEqual(2, todo.advisories.count())
173+
self.assertEqual(todo, adv2.advisory_todos.first())

0 commit comments

Comments
 (0)