Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions tests/etl/test_push_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from treeherder.etl.push_loader import (
GithubPullRequestTransformer,
GithubPushTransformer,
HgPushFetchError,
HgPushTransformer,
PulsePushError,
PushLoader,
Expand Down Expand Up @@ -218,3 +219,18 @@ def test_ingest_github_push_comma_separated_branches(
"https://firefox-ci-tc.services.mozilla.com",
)
assert Push.objects.count() == expected_pushes


def test_fetch_push_raises_on_empty_pushes(monkeypatch):
"""Test that a HgPushFetchError is raised when fetch_json returns a dict without 'pushes'"""
monkeypatch.setattr("treeherder.etl.push_loader.fetch_json", lambda url: {})
transformer = HgPushTransformer(
{
"payload": {
"repo_url": "https://hg.mozilla.org/try",
"pushlog_pushes": [{"push_full_json_url": "http://example"}],
}
}
)
with pytest.raises(HgPushFetchError):
transformer.fetch_push("http://example", repository="try")
48 changes: 47 additions & 1 deletion treeherder/etl/push_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,47 @@ def fetch_push(self, url, repository, sha=None):

logger.debug("fetching for %s %s", repository, url)
# there will only ever be one, with this url
push = list(fetch_json(url)["pushes"].values())[0]
try:
data = fetch_json(url)
except Exception as e:
logger.exception(f"Failed fetching push JSON from {url} for {repository}: {e}")
try:
newrelic.agent.record_custom_event(
"hg_push_fetch_failure",
{"url": url, "repository": repository, "error": str(e)},
)
except Exception:
# NewRelic failures should not block ingestion
logger.debug("NewRelic event failed for fetch error")
raise HgPushFetchError(f"Failed to fetch JSON from {url}: {e}") from e

pushes = None
if isinstance(data, dict):
pushes = data.get("pushes")

if not pushes or not isinstance(pushes, dict):
# Log data in warning and raise error
data_keys = list(data.keys()) if isinstance(data, dict) else None
logger.warning(
f"Malformed or empty push JSON from {url} for {repository}: data-keys={data_keys}"
)
try:
newrelic.agent.record_custom_event(
"hg_push_fetch_malformed",
{"url": url, "repository": repository, "data_keys": data_keys},
)
except Exception:
logger.debug("NewRelic event failed for malformed Hg push data")
raise HgPushFetchError(f"Malformed or empty 'pushes' for {url}")

# Safely get the first push
try:
push = next(iter(pushes.values()))
except Exception as e:
logger.exception(
f"Failed to extract first push from pushes for {repository} {url}: {e}"
)
raise HgPushFetchError(f"Unable to extract push from 'pushes' for {url}: {e}") from e

commits = []
# we only want to ingest the last 200 commits for each push,
Expand All @@ -285,3 +325,9 @@ def fetch_push(self, url, repository, sha=None):

class PulsePushError(ValueError):
pass


class HgPushFetchError(Exception):
"""Raised when fetching or parsing a push resultset fails."""

pass