Skip to content

Commit 9917c0d

Browse files
moijes12Archaeopteryx
authored andcommitted
Bug 1387418: Handle empty push resultset from hg (#9074)
* Bug 1387418: Handle empty push resultset from hg This patch parses the hg push resultset and handles cases where pushes are not present by raising an appropriate error and logging the error correctly. * Bug 1387418: Address review comments Made the below changes to push_loader.py * Used an f-string in line 267 to log exception * Corrected comment where we log data in warning * Used `NewRelic event failed for malformed Hg push data` in place of `NewRelic event failed for malformed data` * Inserted f-string declaration and corrected typo in it too * Renamed `ResultsetFetchError` to `HgPushFetchError` * Renamed `resultset_fetch_failure` to `hg_push_fetch_failure` * Corrected typo from `fecht` to `fetch` Made the below change to test_push_loader.py * Replace `ResultsetFetchError` with `HgPushFetchError`
1 parent 1807b56 commit 9917c0d

File tree

2 files changed

+63
-1
lines changed

2 files changed

+63
-1
lines changed

tests/etl/test_push_loader.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from treeherder.etl.push_loader import (
99
GithubPullRequestTransformer,
1010
GithubPushTransformer,
11+
HgPushFetchError,
1112
HgPushTransformer,
1213
PulsePushError,
1314
PushLoader,
@@ -218,3 +219,18 @@ def test_ingest_github_push_comma_separated_branches(
218219
"https://firefox-ci-tc.services.mozilla.com",
219220
)
220221
assert Push.objects.count() == expected_pushes
222+
223+
224+
def test_fetch_push_raises_on_empty_pushes(monkeypatch):
225+
"""Test that a HgPushFetchError is raised when fetch_json returns a dict without 'pushes'"""
226+
monkeypatch.setattr("treeherder.etl.push_loader.fetch_json", lambda url: {})
227+
transformer = HgPushTransformer(
228+
{
229+
"payload": {
230+
"repo_url": "https://hg.mozilla.org/try",
231+
"pushlog_pushes": [{"push_full_json_url": "http://example"}],
232+
}
233+
}
234+
)
235+
with pytest.raises(HgPushFetchError):
236+
transformer.fetch_push("http://example", repository="try")

treeherder/etl/push_loader.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,47 @@ def fetch_push(self, url, repository, sha=None):
261261

262262
logger.debug("fetching for %s %s", repository, url)
263263
# there will only ever be one, with this url
264-
push = list(fetch_json(url)["pushes"].values())[0]
264+
try:
265+
data = fetch_json(url)
266+
except Exception as e:
267+
logger.exception(f"Failed fetching push JSON from {url} for {repository}: {e}")
268+
try:
269+
newrelic.agent.record_custom_event(
270+
"hg_push_fetch_failure",
271+
{"url": url, "repository": repository, "error": str(e)},
272+
)
273+
except Exception:
274+
# NewRelic failures should not block ingestion
275+
logger.debug("NewRelic event failed for fetch error")
276+
raise HgPushFetchError(f"Failed to fetch JSON from {url}: {e}") from e
277+
278+
pushes = None
279+
if isinstance(data, dict):
280+
pushes = data.get("pushes")
281+
282+
if not pushes or not isinstance(pushes, dict):
283+
# Log data in warning and raise error
284+
data_keys = list(data.keys()) if isinstance(data, dict) else None
285+
logger.warning(
286+
f"Malformed or empty push JSON from {url} for {repository}: data-keys={data_keys}"
287+
)
288+
try:
289+
newrelic.agent.record_custom_event(
290+
"hg_push_fetch_malformed",
291+
{"url": url, "repository": repository, "data_keys": data_keys},
292+
)
293+
except Exception:
294+
logger.debug("NewRelic event failed for malformed Hg push data")
295+
raise HgPushFetchError(f"Malformed or empty 'pushes' for {url}")
296+
297+
# Safely get the first push
298+
try:
299+
push = next(iter(pushes.values()))
300+
except Exception as e:
301+
logger.exception(
302+
f"Failed to extract first push from pushes for {repository} {url}: {e}"
303+
)
304+
raise HgPushFetchError(f"Unable to extract push from 'pushes' for {url}: {e}") from e
265305

266306
commits = []
267307
# we only want to ingest the last 200 commits for each push,
@@ -285,3 +325,9 @@ def fetch_push(self, url, repository, sha=None):
285325

286326
class PulsePushError(ValueError):
287327
pass
328+
329+
330+
class HgPushFetchError(Exception):
331+
"""Raised when fetching or parsing a push resultset fails."""
332+
333+
pass

0 commit comments

Comments
 (0)