mozilla · camd · Nov 18, 2025 · Nov 19, 2025 · jmaher · Nov 18, 2025
diff --git a/treeherder/push_health/tests.py b/treeherder/push_health/tests.py
@@ -7,12 +7,15 @@
 from django.core.cache import cache
 from django.db.models import Q
 
+from treeherder.model import error_summary
 from treeherder.model.models import (
     FailureLine,
     InvestigatedTests,
     Job,
+    JobLog,
     JobType,
     OptionCollection,
+    TextLogError,
 )
 from treeherder.push_health.filter import filter_failure
 from treeherder.push_health.utils import (
@@ -84,6 +87,51 @@ def get_history(
 # Note: get_current_test_failures was inlined into get_test_failures for clarity
 
 
+def _get_failure_priority(action, status):
+    """
+    Determine priority of a failure based on action and status.
+    Higher numbers = higher priority (more severe failures).
+
+    Priority order (highest to lowest):
+    1. crash action (process crashes)
+    2. CRASH status (test crashed)
+    3. TIMEOUT status
+    4. ASSERT status
+    5. ERROR status
+    6. FAIL status
+    7. log action (log errors)
+    8. Other statuses
+    """
+    # Action-based priorities (higher = more severe)
+    action_priority = {
+        "crash": 1000,  # Process crash - highest priority
+        "log": 100,  # Log errors - medium priority
+        "test_result": 0,  # Test results - priority based on status
+        "group_result": 0,  # Group results - priority based on status
+        "truncated": 50,  # Truncated logs - low-medium priority
+    }
+
+    # Status-based priorities (higher = more severe)
+    status_priority = {
+        "CRASH": 900,  # Test crash
+        "TIMEOUT": 800,  # Test timeout
+        "ASSERT": 700,  # Assertion failure
+        "ERROR": 600,  # Error
+        "FAIL": 500,  # Regular failure
+        "PRECONDITION_FAILED": 400,
+        "NOTRUN": 300,
+        "SKIP": 200,
+        "OK": 100,
+        "PASS": 100,
+    }
+
+    # Combine action and status priorities
+    base_priority = action_priority.get(action, 0)
+    status_boost = status_priority.get(status, 0)
+
+    return base_priority + status_boost
+
+
 def _generate_test_key(test_name, config, platform, job_name, job_group):
     """Generate unique key for test+platform+config+job combination."""
     return re.sub(r"\W+", "", f"t{test_name}{config}{platform}{job_name}{job_group}")
@@ -128,6 +176,8 @@ def _create_test_failure_record(
     investigated_test_id,
 ):
     """Create the test failure record dictionary with all required fields."""
+    priority = _get_failure_priority(failure_line.action, failure_line.status)
+
     return {
         "testName": test_name,
         "action": failure_line.action.split("_")[0],
@@ -149,6 +199,7 @@ def _create_test_failure_record(
         "isInvestigated": is_investigated,
         "investigatedTestId": investigated_test_id,
         "failedInJobs": [],  # Track which jobs failed for this test
+        "_priority": priority,  # Internal field to track failure severity
     }
 
 
@@ -163,6 +214,29 @@ def has_line(failure_line, log_line_list):
     )
 
 
+def get_jobs_with_pending_parsing(job_ids):
+    """
+    Check which jobs have log parsing still pending.
+
+    Args:
+        job_ids: List of job IDs to check
+
+    Returns:
+        set: Job IDs that have at least one JobLog with status=PENDING
+    """
+    if not job_ids:
+        return set()
+
+    # Get all JobLog entries for these jobs that are still pending
+    pending_logs = (
+        JobLog.objects.filter(job_id__in=job_ids, status=JobLog.PENDING)
+        .values_list("job_id", flat=True)
+        .distinct()
+    )
+
+    return set(pending_logs)
+
+
 def get_test_failure_jobs(push):
     testfailed_jobs = (
         Job.objects.filter(
@@ -304,6 +378,14 @@ def get_test_failures(
     if not len(new_failure_jobs_dict):
         return ("none", {"needInvestigation": [], "knownIssues": []})
 
+    # Check if there are any testfailed jobs with fcid=6
+    # This is important for cases where jobs failed but FailureLines haven't been parsed yet
+    has_testfailed_jobs = any(
+        job["result"] == "testfailed"
+        for jobs_list in new_failure_jobs_dict.values()
+        for job in jobs_list
+    )
+
     # option_map is used to map platforms for the job.option_collection_hash
     option_map = OptionCollection.objects.get_option_collection_map()
     investigated_tests = InvestigatedTests.objects.filter(push=push)
@@ -372,6 +454,13 @@ def get_test_failures(
                 is_investigated,
                 investigated_test_id,
             )
+        else:
+            # Test key already exists - check if this failure has higher priority
+            current_priority = _get_failure_priority(failure_line.action, failure_line.status)
+            if current_priority > tests[test_key]["_priority"]:
+                # Update to use the higher priority action
+                tests[test_key]["action"] = failure_line.action.split("_")[0]
+                tests[test_key]["_priority"] = current_priority
 
         # Track which jobs failed for this test (task-to-test matching)
         job_id = job.id
@@ -383,6 +472,198 @@ def get_test_failures(
     push_failures = sorted(tests.values(), key=lambda k: k["testName"])
     filtered_push_failures = [failure for failure in push_failures if filter_failure(failure)]
 
+    # If there are testfailed jobs but no FailureLines, check parsing status before fallback
+    # This handles cases like timeout failures or jobs where parsing is still pending
+    if has_testfailed_jobs and not filtered_push_failures:
+        # Get job IDs that don't have FailureLines
+        jobs_without_failurelines = [
+            job_dict["id"]
+            for jobs_list in new_failure_jobs_dict.values()
+            for job_dict in jobs_list
+            if job_dict["result"] == "testfailed" and job_dict["id"] not in all_failed_jobs
+        ]
+
+        # Initialize these variables for later use
+        jobs_pending_parsing = []
+        jobs_completed_parsing = []
+
+        if jobs_without_failurelines:
+            # Check which jobs have log parsing still pending
+            jobs_with_pending_parsing = get_jobs_with_pending_parsing(jobs_without_failurelines)
+
+            # Separate jobs into pending vs completed parsing
+            jobs_pending_parsing = [
+                jid for jid in jobs_without_failurelines if jid in jobs_with_pending_parsing
+            ]
+            jobs_completed_parsing = [
+                jid for jid in jobs_without_failurelines if jid not in jobs_with_pending_parsing
+            ]
+
+        if jobs_completed_parsing:
+            # Query TextLogError only for jobs that have completed parsing
+            text_log_errors = TextLogError.objects.filter(
+                job_id__in=jobs_completed_parsing
+            ).select_related("job__job_type", "job__job_group", "job__machine_platform")
+
+            # Group by job to process each job's errors
+            errors_by_job = {}
+            for error in text_log_errors:
+                job_id = error.job_id
+                if job_id not in errors_by_job:
+                    errors_by_job[job_id] = []
+                errors_by_job[job_id].append(error)
+
+            # Process each job's TextLogErrors
+            for job_id, errors in errors_by_job.items():
+                job = errors[0].job  # Get the job object from the first error
+                config = clean_config(option_map[job.option_collection_hash])
+                platform = clean_platform(job.machine_platform.platform)
+                job_name = job.job_type.name
+                job_group = job.job_group.name
+                job.job_key = f"{config}{platform}{job_name}{job_group}"
+                all_failed_jobs[job.id] = job
+
+                # Process each error line to extract test information
+                for error in errors:
+                    # Parse the error line to extract test name
+                    clean_line = error_summary.get_cleaned_line(error.line)
+                    search_info = error_summary.get_error_search_term_and_path(clean_line)
+
+                    # Skip if we can't extract meaningful test information
+                    if not search_info or not search_info.get("path_end"):
+                        continue
+
+                    test_name = search_info["path_end"]
+
+                    # Generate unique key for this test+platform+config+job combination
+                    test_key = _generate_test_key(test_name, config, platform, job_name, job_group)
+
+                    # Check if test is investigated
+                    is_investigated, investigated_test_id = _check_investigation_status(
+                        test_name, job.job_type.id, investigated_tests
+                    )
+
+                    # Create test failure record if this is the first time seeing this test key
+                    if test_key not in tests:
+                        total_jobs_for_type = _count_total_jobs_for_type(job_name, all_jobs_dict)
+
+                        # Create a minimal failure_line-like object for compatibility
+                        class TextLogErrorAdapter:
+                            def __init__(self):
+                                self.action = "log"
+                                self.status = "ERROR"  # Default status for log errors
+
+                        tests[test_key] = _create_test_failure_record(
+                            test_name,
+                            TextLogErrorAdapter(),
+                            job,
+                            config,
+                            platform,
+                            test_key,
+                            total_jobs_for_type,
+                            is_investigated,
+                            investigated_test_id,
+                        )
+                    else:
+                        # Test key already exists - check if this failure has higher priority
+                        # TextLogError entries have action="log" and status="ERROR"
+                        current_priority = _get_failure_priority("log", "ERROR")
+                        if current_priority > tests[test_key]["_priority"]:
+                            # Update to use the higher priority action
+                            tests[test_key]["action"] = "log"
+                            tests[test_key]["_priority"] = current_priority
+
+                    # Track which jobs failed for this test (task-to-test matching)
+                    if job_id not in tests[test_key]["failedInJobs"]:
+                        tests[test_key]["failedInJobs"].append(job_id)
+                        tests[test_key]["totalFailures"] += 1
+
+            # Re-sort and filter after adding TextLogError-based failures
+            push_failures = sorted(tests.values(), key=lambda k: k["testName"])
+            filtered_push_failures = [
+                failure for failure in push_failures if filter_failure(failure)
+            ]
+
+        # Create placeholder entries for jobs without failures
+        if not filtered_push_failures:
+            placeholder_failures = []
+
+            # Create entries for jobs with pending parsing
+            if jobs_pending_parsing:
+                for job_name, jobs_list in new_failure_jobs_dict.items():
+                    for job_dict in jobs_list:
+                        if (
+                            job_dict["result"] == "testfailed"
+                            and job_dict["id"] in jobs_pending_parsing
+                        ):
+                            # Create a "parsing pending" entry for this job
+                            placeholder_failures.append(
+                                {
+                                    "testName": f"Log parsing pending for {job_name}",
+                                    "action": "test",
+                                    "jobName": job_dict["job_type_name"],
+                                    "jobSymbol": job_dict["job_type_symbol"],
+                                    "jobGroup": "",
+                                    "jobGroupSymbol": "",
+                                    "platform": job_dict["platform"],
+                                    "config": "",
+                                    "key": f"pending-{job_dict['id']}",
+                                    "jobKey": job_name,
+                                    "suggestedClassification": "Log Parsing Pending",
+                                    "confidence": 0,
+                                    "tier": job_dict.get("tier", 1),
+                                    "totalFailures": 1,
+                                    "totalJobs": 1,
+                                    "failedInParent": False,
+                                    "isClassifiedIntermittent": False,
+                                    "isInvestigated": False,
+                                    "investigatedTestId": None,
+                                    "failedInJobs": [job_dict["id"]],
+                                }
+                            )
+
+            # Create entries for jobs that completed parsing but have no failures
+            # (This is unusual but can happen with certain failure types)
+            if jobs_completed_parsing:
+                for job_name, jobs_list in new_failure_jobs_dict.items():
+                    for job_dict in jobs_list:
+                        if (
+                            job_dict["result"] == "testfailed"
+                            and job_dict["id"] in jobs_completed_parsing
+                            and job_dict["id"] not in all_failed_jobs
+                        ):
+                            # Create a placeholder for parsed jobs with no detectable failures
+                            placeholder_failures.append(
+                                {
+                                    "testName": f"No test failures detected for {job_name}",
+                                    "action": "test",
+                                    "jobName": job_dict["job_type_name"],
+                                    "jobSymbol": job_dict["job_type_symbol"],
+                                    "jobGroup": "",
+                                    "jobGroupSymbol": "",
+                                    "platform": job_dict["platform"],
+                                    "config": "",
+                                    "key": f"no-failures-{job_dict['id']}",
+                                    "jobKey": job_name,
+                                    "suggestedClassification": "Unknown Failure",
+                                    "confidence": 0,
+                                    "tier": job_dict.get("tier", 1),
+                                    "totalFailures": 1,
+                                    "totalJobs": 1,
+                                    "failedInParent": False,
+                                    "isClassifiedIntermittent": False,
+                                    "isInvestigated": False,
+                                    "investigatedTestId": None,
+                                    "failedInJobs": [job_dict["id"]],
+                                }
+                            )
+
+            filtered_push_failures = placeholder_failures
+
+    # Remove internal _priority field before returning results
+    for failure in filtered_push_failures:
+        failure.pop("_priority", None)
+
     # SIMPLIFIED: All fcid=6 failures go to needInvestigation
     # No historical classification needed since we're focused on new failures
     failures = {
@@ -392,6 +673,10 @@ def get_test_failures(
 
     if len(failures["needInvestigation"]):
         result = "fail"
+    elif has_testfailed_jobs:
+        # If there are testfailed jobs with fcid=6 but no FailureLines yet,
+        # still mark as "fail" since these are new failures pending log parsing
+        result = "fail"
     elif "unknown" in result_status:
         result = "unknown"