Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 52 additions & 24 deletions integration-tests/tests/fixtures/integration_test_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
IntegrationTestLogs,
IntegrationTestPathConfig,
)
from tests.utils.utils import unlink

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -48,7 +47,18 @@ def _download_and_extract_dataset(
integration_test_path_config: IntegrationTestPathConfig,
name: str,
tarball_url: str,
has_leading_directory_component: bool = True,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This name is a bit awkward; how about

Suggested change
has_leading_directory_component: bool = True,
strip_leading_dir: bool = True,

or

Suggested change
has_leading_directory_component: bool = True,
strip_top_dir: bool = True,

) -> IntegrationTestLogs:
"""
:param request:
:param integration_test_path_config:
"param name:
:param tarball_url:
:param has_leading_directory_component: Whether all files inside the tarball are stored under a
single top level directory. Defaults to True.
:return: The IntegrationTestPathConfig object with its associated logs properly downloaded,
extracted, and permission changed to be overritable.
"""
Comment on lines +52 to +61
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Docstring is malformed and does not match the actual return type

The function docstring has a stray quote, a missing :param name: entry, a typo (“overritable”), and still claims to return IntegrationTestPathConfig instead of IntegrationTestLogs. This can mislead future readers.

-    """
-    :param request:
-    :param integration_test_path_config:
-    "param name:
-    :param tarball_url:
-    :param has_leading_directory_component: Whether all files inside the tarball are stored under a
-    single top level directory. Defaults to True.
-    :return: The IntegrationTestPathConfig object with its associated logs properly downloaded,
-    extracted, and permission changed to be overritable.
-    """
+    """
+    Download and extract an integration-test log dataset tarball.
+
+    :param request: The pytest fixture request, used for accessing the cache.
+    :param integration_test_path_config: Paths configuration for integration tests.
+    :param name: Dataset name (used for the cache key and file/directory naming).
+    :param tarball_url: URL of the dataset tarball.
+    :param has_leading_directory_component: Whether all files inside the tarball are stored under a
+        single top-level directory. Defaults to True.
+    :return: The IntegrationTestLogs describing the tarball and extraction directory after download.
+    """
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
"""
:param request:
:param integration_test_path_config:
"param name:
:param tarball_url:
:param has_leading_directory_component: Whether all files inside the tarball are stored under a
single top level directory. Defaults to True.
:return: The IntegrationTestPathConfig object with its associated logs properly downloaded,
extracted, and permission changed to be overritable.
"""
"""
Download and extract an integration-test log dataset tarball.
:param request: The pytest fixture request, used for accessing the cache.
:param integration_test_path_config: Paths configuration for integration tests.
:param name: Dataset name (used for the cache key and file/directory naming).
:param tarball_url: URL of the dataset tarball.
:param has_leading_directory_component: Whether all files inside the tarball are stored under a
single top-level directory. Defaults to True.
:return: The IntegrationTestLogs describing the tarball and extraction directory after download.
"""
🤖 Prompt for AI Agents
In integration-tests/tests/fixtures/integration_test_logs.py around lines 52 to
61, the function docstring is malformed (a stray quote, a missing :param name:
entry), contains a typo (“overritable”), and lists the wrong return type; update
the docstring to remove the stray quote, add a proper :param name: description
matching the function signature, fix the spelling to “overwritable”, and change
the :return: to correctly state and describe IntegrationTestLogs (and ensure all
param names and descriptions exactly match the function parameters).

Comment on lines +55 to +61
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"param name:
:param tarball_url:
:param has_leading_directory_component: Whether all files inside the tarball are stored under a
single top level directory. Defaults to True.
:return: The IntegrationTestPathConfig object with its associated logs properly downloaded,
extracted, and permission changed to be overritable.
"""
:param name:
:param tarball_url:
:param has_leading_directory_component: Whether all files inside the tarball are stored under a
single top-level directory.
:return: The IntegrationTestLogs object describing the donloaded and extracted logs.
"""

clarity and typo fixes.

integration_test_logs = IntegrationTestLogs(
name=name,
tarball_url=tarball_url,
Expand All @@ -58,38 +68,56 @@ def _download_and_extract_dataset(
logger.info("Test logs `%s` are up-to-date. Skipping download.", name)
return integration_test_logs

integration_test_logs.tarball_path.unlink(missing_ok=True)
shutil.rmtree(integration_test_logs.extraction_dir)
integration_test_logs.extraction_dir.mkdir(parents=True, exist_ok=False)
Comment on lines +71 to +73
Copy link
Contributor

@coderabbitai coderabbitai bot Nov 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

shutil.rmtree will raise on first run when the extraction directory does not exist

Calling shutil.rmtree(integration_test_logs.extraction_dir) unconditionally will raise a FileNotFoundError on the very first run (before the directory has ever been created), causing the fixture to fail before any download/extraction happens. The earlier helper-based cleanup was tolerant of missing paths.

-    integration_test_logs.tarball_path.unlink(missing_ok=True)
-    shutil.rmtree(integration_test_logs.extraction_dir)
-    integration_test_logs.extraction_dir.mkdir(parents=True, exist_ok=False)
+    integration_test_logs.tarball_path.unlink(missing_ok=True)
+    try:
+        shutil.rmtree(integration_test_logs.extraction_dir)
+    except FileNotFoundError:
+        pass
+    integration_test_logs.extraction_dir.mkdir(parents=True, exist_ok=False)

If you still want the previous “force remove with sudo” behaviour for stubborn paths, you could instead wire this back through tests.utils.utils.unlink, but the above is the minimal fix for the first-run failure.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
integration_test_logs.tarball_path.unlink(missing_ok=True)
shutil.rmtree(integration_test_logs.extraction_dir)
integration_test_logs.extraction_dir.mkdir(parents=True, exist_ok=False)
integration_test_logs.tarball_path.unlink(missing_ok=True)
try:
shutil.rmtree(integration_test_logs.extraction_dir)
except FileNotFoundError:
pass
integration_test_logs.extraction_dir.mkdir(parents=True, exist_ok=False)
🤖 Prompt for AI Agents
In integration-tests/tests/fixtures/integration_test_logs.py around lines 71 to
73, calling shutil.rmtree(integration_test_logs.extraction_dir) will raise
FileNotFoundError on first run; update the cleanup to tolerate missing
directories by either checking existence before removal or calling
shutil.rmtree(integration_test_logs.extraction_dir, ignore_errors=True) so the
fixture no longer fails when the extraction dir doesn't exist.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is correct, I did get an error when I ran this directly after a fresh build. The rabbit's committable suggestion works.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@quinntaylormitchell, thank you for confirming the issue and validating the fix! I appreciate you testing it on a fresh build. The try/except approach should ensure the fixture works reliably on first run while still cleaning up stale directories on subsequent runs.

   (\(\
   ( -.-)
   o_(")(")
   [Happy to help!]

🧠 Learnings used
Learnt from: Bill-hbrhbr
Repo: y-scope/clp PR: 1100
File: integration-tests/tests/fixtures/integration_test_logs.py:54-56
Timestamp: 2025-08-17T16:10:38.722Z
Learning: For PR #1100 (feat(integration-tests): Add CLP package integration tests boilerplate), do not raise cache weakness problems related to the pytest cache implementation in the integration test logs fixtures.

Learnt from: jackluo923
Repo: y-scope/clp PR: 1054
File: components/core/tools/scripts/lib_install/musllinux_1_2/install-prebuilt-packages.sh:27-32
Timestamp: 2025-07-07T17:41:15.655Z
Learning: In CLP installation scripts, consistency across platform scripts is prioritized over defensive programming improvements. For example, when extracting Task binaries with tar in `install-prebuilt-packages.sh`, the extraction pattern should remain consistent with other platform scripts rather than adding defensive flags like `--strip-components=1` to handle potential tarball layout changes.

Learnt from: Bill-hbrhbr
Repo: y-scope/clp PR: 1100
File: integration-tests/tests/utils/config.py:121-123
Timestamp: 2025-08-20T22:07:04.953Z
Learning: For the CLP integration tests codebase, do not suggest deriving tarball filenames from URLs instead of hard-coding ".tar.gz" extension. The user has explicitly rejected this suggestion.

Learnt from: Bill-hbrhbr
Repo: y-scope/clp PR: 1122
File: components/core/src/clp/clp/CMakeLists.txt:175-195
Timestamp: 2025-07-23T09:54:45.185Z
Learning: In the CLP project, when reviewing CMakeLists.txt changes that introduce new compression library dependencies (BZip2, LibLZMA, LZ4, ZLIB), the team prefers to address conditional linking improvements in separate PRs rather than expanding the scope of focused migration PRs like the LibArchive task-based installation migration.

Comment on lines +71 to +73
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A couple things about these three lines (in addition to the rabbit comment, which is correct):

  1. I remember that in tests.utils.utils, you define a function called unlink; you should use it. That being said, I've realized that function doesn't actually do what it says in the docstring. It says Remove a file or directory at path., but it only uses shutil.rmtree, which doesn't remove files (afaik). I think that we should do either one of two things: A. we should update that function to something like def remove_file_or_dir, make it do what the docstring says, and then use it everywhere in our integration tests, or B. we should remove that function entirely and use only a mixture of shutil.rmtree for directories and os.unlink for files.
  2. Why don't we make extraction_dir in the IntegrationTestLogs __post_init__?


tarball_path_str = str(integration_test_logs.tarball_path)
extract_path_str = str(integration_test_logs.extraction_dir)

curl_bin = shutil.which("curl")
if curl_bin is None:
err_msg = "curl executable not found"
raise RuntimeError(err_msg)

try:
# fmt: off
curl_cmds = [
curl_bin,
"--fail",
"--location",
"--output", str(integration_test_logs.tarball_path),
"--show-error",
tarball_url,
]
# fmt: on
subprocess.run(curl_cmds, check=True)

unlink(integration_test_logs.extraction_dir)
shutil.unpack_archive(
integration_test_logs.tarball_path, integration_test_logs.extraction_dir
)
except Exception as e:
err_msg = f"Failed to download and extract dataset `{name}`."
raise RuntimeError(err_msg) from e

# Allow the extracted content to be deletable or overwritable
# fmt: off
curl_cmd = [
curl_bin,
"--fail",
"--location",
"--output", tarball_path_str,
"--show-error",
tarball_url,
]
# fmt: on
subprocess.run(curl_cmd, check=True)

tar_bin = shutil.which("tar")
if tar_bin is None:
err_msg = "tar executable not found"
raise RuntimeError(err_msg)

# fmt: off
extract_cmd = [
tar_bin,
"--extract",
"--gzip",
"--file", tarball_path_str,
"-C", extract_path_str,
]
# fmt: on
if has_leading_directory_component:
extract_cmd.extend(["--strip-components", "1"])
subprocess.run(extract_cmd, check=True)

chmod_bin = shutil.which("chmod")
if chmod_bin is None:
err_msg = "chmod executable not found"
raise RuntimeError(err_msg)
subprocess.run([chmod_bin, "-R", "gu+w", integration_test_logs.extraction_dir], check=True)

# Allow the downloaded and extracted contents to be deletable or overwritable
subprocess.run([chmod_bin, "-R", "gu+w", tarball_path_str], check=True)
subprocess.run([chmod_bin, "-R", "gu+w", extract_path_str], check=True)

logger.info("Downloaded and extracted uncompressed logs for dataset `%s`.", name)
request.config.cache.set(name, True)
Expand Down
Loading