Skip to content

Commit 9f6826f

Browse files
committed
Bug 1387418: Handle empty push resultset from hg
This patch parses the hg push resultset and handles cases where pushes are not present by raising an appropriate error and logging the error correctly.
1 parent af0d0f6 commit 9f6826f

File tree

2 files changed

+61
-1
lines changed

2 files changed

+61
-1
lines changed

tests/etl/test_push_loader.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
HgPushTransformer,
1212
PulsePushError,
1313
PushLoader,
14+
ResultsetFetchError,
1415
)
1516
from treeherder.model.models import Push
1617

@@ -218,3 +219,18 @@ def test_ingest_github_push_comma_separated_branches(
218219
"https://firefox-ci-tc.services.mozilla.com",
219220
)
220221
assert Push.objects.count() == expected_pushes
222+
223+
224+
def test_fetch_push_raises_on_empty_pushes(monkeypatch):
225+
"""Test that a ResultsetFetchError is raised when fetch_json returns a dict without 'pushes'"""
226+
monkeypatch.setattr("treeherder.etl.push_loader.fetch_json", lambda url: {})
227+
transformer = HgPushTransformer(
228+
{
229+
"payload": {
230+
"repo_url": "https://hg.mozilla.org/try",
231+
"pushlog_pushes": [{"push_full_json_url": "http://example"}],
232+
}
233+
}
234+
)
235+
with pytest.raises(ResultsetFetchError):
236+
transformer.fetch_push("http://example", repository="try")

treeherder/etl/push_loader.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,45 @@ def fetch_push(self, url, repository, sha=None):
261261

262262
logger.debug("fetching for %s %s", repository, url)
263263
# there will only ever be one, with this url
264-
push = list(fetch_json(url)["pushes"].values())[0]
264+
try:
265+
data = fetch_json(url)
266+
except Exception as e:
267+
logger.exception("Failed fetching push JSON from %s for %s: %s", url, repository, e)
268+
try:
269+
newrelic.agent.record_custom_event(
270+
"resultset_fetch_failure",
271+
{"url": url, "repository": repository, "error": str(e)},
272+
)
273+
except Exception:
274+
# NewRelic failures should not block ingestion
275+
logger.debug("NewRelic event failed for fetch error")
276+
raise ResultsetFetchError(f"Failed to fecth JSON from {url}: {e}") from e
277+
278+
pushes = None
279+
if isinstance(data, dict):
280+
pushes = data.get("pushes")
281+
282+
if not pushes or not isinstance(pushes, dict):
283+
# Log deg info and raise error
284+
data_keys = list(data.keys()) if isinstance(data, dict) else None
285+
logger.warning(
286+
f"Malformed or empty push JSON from {url} for {repository}: data-keys={data_keys}"
287+
)
288+
try:
289+
newrelic.agent.record_custom_event(
290+
"resultset_fetch_malformed",
291+
{"url": url, "repository": repository, "data_keys": data_keys},
292+
)
293+
except Exception:
294+
logger.debug("NewRelic event failed for malformed data")
295+
raise ResultsetFetchError(f"Malformed or empty 'pushes' for {url}")
296+
297+
# Safely get the first push
298+
try:
299+
push = next(iter(pushes.values()))
300+
except Exception as e:
301+
logger.exception("Failed to extract first push from pushes for {respository} {url}:{e}")
302+
raise ResultsetFetchError(f"Unable to extract push from 'pushes' for {url}: {e}") from e
265303

266304
commits = []
267305
# we only want to ingest the last 200 commits for each push,
@@ -285,3 +323,9 @@ def fetch_push(self, url, repository, sha=None):
285323

286324
class PulsePushError(ValueError):
287325
pass
326+
327+
328+
class ResultsetFetchError(Exception):
329+
"""Raised when fetching or parsing a push resultset fails."""
330+
331+
pass

0 commit comments

Comments
 (0)