diff --git a/tests/etl/test_push_loader.py b/tests/etl/test_push_loader.py index a6ce96df0ab..e51a19c5b5d 100644 --- a/tests/etl/test_push_loader.py +++ b/tests/etl/test_push_loader.py @@ -8,6 +8,7 @@ from treeherder.etl.push_loader import ( GithubPullRequestTransformer, GithubPushTransformer, + HgPushFetchError, HgPushTransformer, PulsePushError, PushLoader, @@ -218,3 +219,18 @@ def test_ingest_github_push_comma_separated_branches( "https://firefox-ci-tc.services.mozilla.com", ) assert Push.objects.count() == expected_pushes + + +def test_fetch_push_raises_on_empty_pushes(monkeypatch): + """Test that a HgPushFetchError is raised when fetch_json returns a dict without 'pushes'""" + monkeypatch.setattr("treeherder.etl.push_loader.fetch_json", lambda url: {}) + transformer = HgPushTransformer( + { + "payload": { + "repo_url": "https://hg.mozilla.org/try", + "pushlog_pushes": [{"push_full_json_url": "http://example"}], + } + } + ) + with pytest.raises(HgPushFetchError): + transformer.fetch_push("http://example", repository="try") diff --git a/treeherder/etl/push_loader.py b/treeherder/etl/push_loader.py index 05e3a67a5ae..168c73d08a0 100644 --- a/treeherder/etl/push_loader.py +++ b/treeherder/etl/push_loader.py @@ -261,7 +261,47 @@ def fetch_push(self, url, repository, sha=None): logger.debug("fetching for %s %s", repository, url) # there will only ever be one, with this url - push = list(fetch_json(url)["pushes"].values())[0] + try: + data = fetch_json(url) + except Exception as e: + logger.exception(f"Failed fetching push JSON from {url} for {repository}: {e}") + try: + newrelic.agent.record_custom_event( + "hg_push_fetch_failure", + {"url": url, "repository": repository, "error": str(e)}, + ) + except Exception: + # NewRelic failures should not block ingestion + logger.debug("NewRelic event failed for fetch error") + raise HgPushFetchError(f"Failed to fetch JSON from {url}: {e}") from e + + pushes = None + if isinstance(data, dict): + pushes = data.get("pushes") + + if not pushes or not isinstance(pushes, dict): + # Log data in warning and raise error + data_keys = list(data.keys()) if isinstance(data, dict) else None + logger.warning( + f"Malformed or empty push JSON from {url} for {repository}: data-keys={data_keys}" + ) + try: + newrelic.agent.record_custom_event( + "hg_push_fetch_malformed", + {"url": url, "repository": repository, "data_keys": data_keys}, + ) + except Exception: + logger.debug("NewRelic event failed for malformed Hg push data") + raise HgPushFetchError(f"Malformed or empty 'pushes' for {url}") + + # Safely get the first push + try: + push = next(iter(pushes.values())) + except Exception as e: + logger.exception( + f"Failed to extract first push from pushes for {repository} {url}: {e}" + ) + raise HgPushFetchError(f"Unable to extract push from 'pushes' for {url}: {e}") from e commits = [] # we only want to ingest the last 200 commits for each push, @@ -285,3 +325,9 @@ def fetch_push(self, url, repository, sha=None): class PulsePushError(ValueError): pass + + +class HgPushFetchError(Exception): + """Raised when fetching or parsing a push resultset fails.""" + + pass