Skip to content

Commit f78161a

Browse files
committed
parser: close a TOCTTOU bug on Person creation
find_author looks up a person by email, and if they do not exist, creates a Person model, which may be saved later if the message contains something valuable. Multiple simultaneous processes can race here: both can do the SELECT, find there is no Person, and create the model. One will succeed in saving, the other will get an IntegrityError. Reduce the window by making find_author into get_or_create_author, and plumb that through. (Remove a test that specifically required find_author to *not* create). More importantly, cover the case where we lose the race, by using get_or_create which handles the race case, catching the IntegrityError internally and fetching the winning Person model. Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com> [dja: post review cleanup of now-unused import] Signed-off-by: Daniel Axtens <dja@axtens.net>
1 parent f66261e commit f78161a

File tree

2 files changed

+34
-38
lines changed

2 files changed

+34
-38
lines changed

patchwork/parser.py

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ def _find_series_by_references(project, mail):
238238
continue
239239

240240

241-
def _find_series_by_markers(project, mail):
241+
def _find_series_by_markers(project, mail, author):
242242
"""Find a patch's series using series markers and sender.
243243
244244
Identify suitable series for a patch using a combination of the
@@ -259,7 +259,6 @@ def _find_series_by_markers(project, mail):
259259
still won't help us if someone spams the mailing list with
260260
duplicate series but that's a tricky situation for anyone to parse.
261261
"""
262-
author = find_author(mail)
263262

264263
subject = mail.get('Subject')
265264
name, prefixes = clean_subject(subject, [project.linkname])
@@ -279,7 +278,7 @@ def _find_series_by_markers(project, mail):
279278
return
280279

281280

282-
def find_series(project, mail):
281+
def find_series(project, mail, author):
283282
"""Find a series, if any, for a given patch.
284283
285284
Args:
@@ -294,10 +293,10 @@ def find_series(project, mail):
294293
if series:
295294
return series
296295

297-
return _find_series_by_markers(project, mail)
296+
return _find_series_by_markers(project, mail, author)
298297

299298

300-
def find_author(mail):
299+
def get_or_create_author(mail):
301300
from_header = clean_header(mail.get('From'))
302301

303302
if not from_header:
@@ -338,12 +337,17 @@ def find_author(mail):
338337
if name is not None:
339338
name = name.strip()[:255]
340339

341-
try:
342-
person = Person.objects.get(email__iexact=email)
343-
if name: # use the latest provided name
344-
person.name = name
345-
except Person.DoesNotExist:
346-
person = Person(name=name, email=email)
340+
# this correctly handles the case where we lose the race to create
341+
# the person and another process beats us to it. (If the record
342+
# does not exist, g_o_c invokes _create_object_from_params which
343+
# catches the IntegrityError and repeats the SELECT.)
344+
person = Person.objects.get_or_create(email__iexact=email,
345+
defaults={'name': name,
346+
'email': email})[0]
347+
348+
if name: # use the latest provided name
349+
person.name = name
350+
person.save()
347351

348352
return person
349353

@@ -944,7 +948,6 @@ def parse_mail(mail, list_id=None):
944948
raise ValueError("Broken 'Message-Id' header")
945949
msgid = msgid[:255]
946950

947-
author = find_author(mail)
948951
subject = mail.get('Subject')
949952
name, prefixes = clean_subject(subject, [project.linkname])
950953
is_comment = subject_check(subject)
@@ -970,7 +973,7 @@ def parse_mail(mail, list_id=None):
970973

971974
if not is_comment and (diff or pull_url): # patches or pull requests
972975
# we delay the saving until we know we have a patch.
973-
author.save()
976+
author = get_or_create_author(mail)
974977

975978
delegate = find_delegate_by_header(mail)
976979
if not delegate and diff:
@@ -981,7 +984,7 @@ def parse_mail(mail, list_id=None):
981984
# series to match against.
982985
series = None
983986
if n:
984-
series = find_series(project, mail)
987+
series = find_series(project, mail, author)
985988
else:
986989
x = n = 1
987990

@@ -1058,7 +1061,7 @@ def parse_mail(mail, list_id=None):
10581061
is_cover_letter = True
10591062

10601063
if is_cover_letter:
1061-
author.save()
1064+
author = get_or_create_author(mail)
10621065

10631066
# we don't use 'find_series' here as a cover letter will
10641067
# always be the first item in a thread, thus the references
@@ -1106,7 +1109,7 @@ def parse_mail(mail, list_id=None):
11061109
if not submission:
11071110
return
11081111

1109-
author.save()
1112+
author = get_or_create_author(mail)
11101113

11111114
comment = Comment(
11121115
submission=submission,

patchwork/tests/test_parser.py

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from patchwork.models import Person
3535
from patchwork.models import State
3636
from patchwork.parser import clean_subject
37-
from patchwork.parser import find_author
37+
from patchwork.parser import get_or_create_author
3838
from patchwork.parser import find_patch_content as find_content
3939
from patchwork.parser import find_project_by_header
4040
from patchwork.parser import find_series
@@ -225,7 +225,7 @@ def _create_email(from_header):
225225

226226
def _test_encoding(self, from_header, sender_name, sender_email):
227227
email = self._create_email(from_header)
228-
person = find_author(email)
228+
person = get_or_create_author(email)
229229
person.save()
230230

231231
# ensure it was parsed correctly
@@ -241,7 +241,7 @@ def _test_encoding(self, from_header, sender_name, sender_email):
241241
def test_empty(self):
242242
email = self._create_email('')
243243
with self.assertRaises(ValueError):
244-
find_author(email)
244+
get_or_create_author(email)
245245

246246
def test_ascii_encoding(self):
247247
from_header = 'example user <user@example.com>'
@@ -269,7 +269,7 @@ def test_utf8b64_encoding(self):
269269

270270

271271
class SenderCorrelationTest(TestCase):
272-
"""Validate correct behavior of the find_author case.
272+
"""Validate correct behavior of the get_or_create_author case.
273273
274274
Relies of checking the internal state of a Django model object.
275275
@@ -284,25 +284,16 @@ def _create_email(from_header):
284284
'test\n'
285285
return message_from_string(mail)
286286

287-
def test_non_existing_sender(self):
288-
sender = 'Non-existing Sender <nonexisting@example.com>'
289-
mail = self._create_email(sender)
290-
291-
# don't create the person - attempt to find immediately
292-
person = find_author(mail)
293-
self.assertEqual(person._state.adding, True)
294-
self.assertEqual(person.id, None)
295-
296287
def test_existing_sender(self):
297288
sender = 'Existing Sender <existing@example.com>'
298289
mail = self._create_email(sender)
299290

300291
# create the person first
301-
person_a = find_author(mail)
292+
person_a = get_or_create_author(mail)
302293
person_a.save()
303294

304295
# then attempt to parse email with the same 'From' line
305-
person_b = find_author(mail)
296+
person_b = get_or_create_author(mail)
306297
self.assertEqual(person_b._state.adding, False)
307298
self.assertEqual(person_b.id, person_a.id)
308299

@@ -311,24 +302,24 @@ def test_existing_different_format(self):
311302
mail = self._create_email(sender)
312303

313304
# create the person first
314-
person_a = find_author(mail)
305+
person_a = get_or_create_author(mail)
315306
person_a.save()
316307

317308
# then attempt to parse email with a new 'From' line
318309
mail = self._create_email('existing@example.com')
319-
person_b = find_author(mail)
310+
person_b = get_or_create_author(mail)
320311
self.assertEqual(person_b._state.adding, False)
321312
self.assertEqual(person_b.id, person_a.id)
322313

323314
def test_existing_different_case(self):
324315
sender = 'Existing Sender <existing@example.com>'
325316
mail = self._create_email(sender)
326317

327-
person_a = find_author(mail)
318+
person_a = get_or_create_author(mail)
328319
person_a.save()
329320

330321
mail = self._create_email(sender.upper())
331-
person_b = find_author(mail)
322+
person_b = get_or_create_author(mail)
332323
self.assertEqual(person_b._state.adding, False)
333324
self.assertEqual(person_b.id, person_a.id)
334325

@@ -361,7 +352,8 @@ def test_new_series(self):
361352
email = self._create_email(msgid)
362353
project = create_project()
363354

364-
self.assertIsNone(find_series(project, email))
355+
self.assertIsNone(find_series(project, email,
356+
get_or_create_author(email)))
365357

366358
def test_first_reply(self):
367359
msgid_a = make_msgid()
@@ -371,7 +363,8 @@ def test_first_reply(self):
371363
# assume msgid_a was already handled
372364
ref = create_series_reference(msgid=msgid_a)
373365

374-
series = find_series(ref.series.project, email)
366+
series = find_series(ref.series.project, email,
367+
get_or_create_author(email))
375368
self.assertEqual(series, ref.series)
376369

377370
def test_nested_series(self):
@@ -395,7 +388,7 @@ def test_nested_series(self):
395388
# ...and the "first patch" of this new series
396389
msgid = make_msgid()
397390
email = self._create_email(msgid, msgids)
398-
series = find_series(project, email)
391+
series = find_series(project, email, get_or_create_author(email))
399392

400393
# this should link to the second series - not the first
401394
self.assertEqual(len(msgids), 4 + 1) # old series + new cover

0 commit comments

Comments
 (0)