Skip to content

Commit 4c11e4b

Browse files
davidkoppclaude
andcommitted
Optimize pip detector venv path search with early returns and batch operations
- Refactor _find_venv_path for priority-based search with immediate validation - Add early returns for high-priority paths (explicit, VIRTUAL_ENV, pip location) - Combine local and external venv searches into single efficient batch operation - Fix circular dependency between _find_venv_path and _get_pip_command - Cache pip command to eliminate redundant path_exists calls - Resolve home directory explicitly to fix tilde expansion issues in containers - Remove /usr/local from system-wide fallback search to avoid false positives - Enhance test coverage with system distractor packages validation - Improve test clarity by renaming _install_system_packages to _create_system_distractor_packages - Replace mock-based pip show test with real integration test 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent ef77028 commit 4c11e4b

File tree

4 files changed

+239
-40
lines changed

4 files changed

+239
-40
lines changed

dependency_resolver/detectors/pip_detector.py

Lines changed: 84 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ def __init__(self, venv_path: Optional[str] = None, debug: bool = False):
1919
self.debug = debug
2020
self._cached_venv_path: str | None = None
2121
self._venv_path_searched = False
22+
self._cached_pip_command: str | None = None
2223

2324
def is_usable(self, executor: EnvironmentExecutor, working_dir: Optional[str] = None) -> bool:
2425
"""Check if pip is usable in the environment."""
@@ -77,81 +78,125 @@ def get_dependencies(
7778
return packages, metadata
7879

7980
def _get_pip_location(self, executor: EnvironmentExecutor, working_dir: Optional[str] = None) -> str:
80-
"""Get the location of the pip environment."""
81+
"""Get the location of the pip environment, properly classifying system vs project scope."""
8182
pip_command = self._get_pip_command(executor, working_dir)
8283
stdout, _, exit_code = executor.execute_command(f"{pip_command} show pip", working_dir)
8384

8485
if exit_code == 0:
8586
for line in stdout.split("\n"):
8687
if line.startswith("Location:"):
87-
return line.split(":", 1)[1].strip()
88+
location = line.split(":", 1)[1].strip()
89+
90+
# Check if this is a system location
91+
if location.startswith(("/usr/lib", "/usr/local/lib")):
92+
return "system"
93+
94+
return location
8895

8996
return "system"
9097

9198
def _get_pip_command(self, executor: EnvironmentExecutor, working_dir: Optional[str] = None) -> str:
9299
"""Get the appropriate pip command, activating venv if available."""
100+
if hasattr(self, "_cached_pip_command") and self._cached_pip_command:
101+
return self._cached_pip_command
102+
93103
venv_path = self._find_venv_path(executor, working_dir)
94104
if venv_path:
95105
venv_pip = f"{venv_path}/bin/pip"
96106
if executor.path_exists(venv_pip):
107+
self._cached_pip_command = venv_pip
97108
return venv_pip
109+
110+
self._cached_pip_command = "pip"
98111
return "pip"
99112

113+
def _validate_venv_path(self, executor: EnvironmentExecutor, path: str) -> str | None:
114+
"""Check if a path contains pyvenv.cfg and return the venv path if valid."""
115+
pyvenv_cfg_path = f"{path}/pyvenv.cfg"
116+
if executor.path_exists(pyvenv_cfg_path):
117+
self._cached_venv_path = path
118+
self._venv_path_searched = True
119+
return path
120+
return None
121+
100122
def _find_venv_path(self, executor: EnvironmentExecutor, working_dir: Optional[str] = None) -> str | None:
101-
"""Find virtual environment using batch search for pyvenv.cfg files."""
123+
"""Find virtual environment using priority-based search for pyvenv.cfg files."""
102124
# Return cached result if already searched
103125
if self._venv_path_searched:
104126
return self._cached_venv_path
105127

106-
# Build search paths in priority order
107-
search_paths = []
108-
search_dir = working_dir or "."
109-
110-
# 1. Explicit venv path (highest priority)
128+
# 1. Explicit venv path (highest priority) - check immediately
111129
if self.explicit_venv_path:
112130
stdout, _, exit_code = executor.execute_command(f"echo {self.explicit_venv_path}")
113131
if exit_code == 0 and stdout.strip():
114-
search_paths.append(stdout.strip())
132+
venv_path = self._validate_venv_path(executor, stdout.strip())
133+
if venv_path:
134+
self._cached_venv_path = venv_path
135+
self._venv_path_searched = True
136+
return venv_path
115137

116-
# 2. VIRTUAL_ENV environment variable (Docker containers)
138+
# 2. VIRTUAL_ENV environment variable (Docker containers) - check immediately
117139
if not isinstance(executor, HostExecutor):
118140
stdout, _, exit_code = executor.execute_command("echo $VIRTUAL_ENV", working_dir)
119141
if exit_code == 0 and stdout.strip():
120-
search_paths.append(stdout.strip())
121-
122-
# 3. Local project directory and common venv names
123-
search_paths.extend(
124-
[
125-
search_dir,
126-
f"{search_dir}/venv",
127-
f"{search_dir}/.venv",
128-
f"{search_dir}/env",
129-
f"{search_dir}/.env",
130-
f"{search_dir}/virtualenv",
131-
]
132-
)
142+
venv_path = self._validate_venv_path(executor, stdout.strip())
143+
if venv_path:
144+
self._cached_venv_path = venv_path
145+
self._venv_path_searched = True
146+
return venv_path
147+
148+
# 3. Extract venv path from pip show pip location (fallback method) - check immediately
149+
# Use basic pip command to avoid circular dependency with _get_pip_command
150+
stdout, _, exit_code = executor.execute_command("pip show pip", working_dir)
151+
if exit_code == 0:
152+
for line in stdout.split("\n"):
153+
if line.startswith("Location:"):
154+
pip_location = line.split(":", 1)[1].strip()
155+
# Skip system locations
156+
if not pip_location.startswith(("/usr/lib", "/usr/local/lib")) and "/lib/python" in pip_location:
157+
venv_path = pip_location.split("/lib/python")[0]
158+
# Trust this path since pip is installed there - just cache and return
159+
self._cached_venv_path = venv_path
160+
self._venv_path_searched = True
161+
return venv_path
162+
break
163+
164+
# 4. Batch search for remaining venv locations
165+
# Collect all paths and use single find command for efficiency (fewer executor calls)
166+
search_paths = []
167+
common_venv_names = ["venv", ".venv", "env", ".env", "virtualenv"]
133168

134-
# 4. External venv locations (if working_dir specified)
169+
# Check working directory and its subdirectories (if working_dir specified)
170+
if working_dir:
171+
search_paths.append(working_dir) # working_dir itself
172+
for venv_name in common_venv_names:
173+
search_paths.append(f"{working_dir}/{venv_name}")
174+
175+
# Check home directory subdirectories (always check)
176+
# Resolve home directory explicitly to avoid tilde expansion issues
177+
home_stdout, _, home_exit_code = executor.execute_command("echo $HOME")
178+
if home_exit_code == 0 and home_stdout.strip():
179+
home_dir = home_stdout.strip()
180+
for venv_name in common_venv_names:
181+
search_paths.append(f"{home_dir}/{venv_name}")
182+
183+
# External venv locations (project-specific, only if working_dir specified)
135184
if working_dir:
136185
resolved_working_dir = self._resolve_absolute_path(executor, working_dir)
137186
project_name = os.path.basename(resolved_working_dir)
138187

139-
# Expand external paths
140-
external_locations = [
141-
f"~/.virtualenvs/{project_name}",
142-
f"~/.local/share/virtualenvs/{project_name}",
143-
f"~/.cache/pypoetry/virtualenvs/{project_name}",
144-
f"~/.pyenv/versions/{project_name}",
145-
]
146-
147-
for location in external_locations:
148-
stdout, _, exit_code = executor.execute_command(f"echo {location}")
149-
if exit_code == 0 and stdout.strip():
150-
search_paths.append(stdout.strip())
188+
# Use resolved home directory for external locations too
189+
if home_exit_code == 0 and home_stdout.strip():
190+
home_dir = home_stdout.strip()
191+
external_locations = [
192+
f"{home_dir}/.virtualenvs/{project_name}",
193+
f"{home_dir}/.local/share/virtualenvs/{project_name}",
194+
f"{home_dir}/.cache/pypoetry/virtualenvs/{project_name}",
195+
f"{home_dir}/.pyenv/versions/{project_name}",
196+
]
197+
search_paths.extend(external_locations)
151198

152-
# Single batch find command to locate pyvenv.cfg in all search paths
153199
if search_paths:
154-
# Escape paths and create find command
155200
escaped_paths = [f"'{path}'" for path in search_paths]
156201
find_cmd = f"find {' '.join(escaped_paths)} -maxdepth 1 -name 'pyvenv.cfg' -type f 2>/dev/null | head -1"
157202

@@ -169,7 +214,7 @@ def _find_venv_path(self, executor: EnvironmentExecutor, working_dir: Optional[s
169214
print("pip_detector performing system-wide pyvenv.cfg search in container environment")
170215

171216
stdout, _, exit_code = executor.execute_command(
172-
"find /opt /home /usr/local -name 'pyvenv.cfg' -type f 2>/dev/null | head -1"
217+
"find /opt /home -name 'pyvenv.cfg' -type f 2>/dev/null | head -1"
173218
)
174219

175220
if exit_code == 0 and stdout.strip():

tests/detectors/pip/test_pip_venv_detection.py

Lines changed: 115 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,9 @@ def test_virtualenvs_project_name_detection(self, request: pytest.FixtureRequest
163163
_, _, exit_code = executor.execute_command(f"mkdir -p {virtualenvs_dir}")
164164
assert exit_code == 0, f"Failed to create virtualenvs directory {virtualenvs_dir}"
165165

166+
# Create system distractor packages first (should NOT be detected)
167+
self._create_system_distractor_packages(executor)
168+
166169
self._create_venv(executor, venv_path)
167170
self._install_test_packages(executor, venv_path)
168171

@@ -177,7 +180,7 @@ def test_virtualenvs_project_name_detection(self, request: pytest.FixtureRequest
177180

178181
self._validate_venv_detection(packages, metadata, expected_scope="project")
179182

180-
# Verify it found the virtualenvs venv
183+
# Verify it found the specific virtualenvs venv path
181184
assert metadata["location"].startswith(
182185
venv_path
183186
), f"Expected location to start with {venv_path}, got {metadata['location']}"
@@ -198,6 +201,9 @@ def test_venv_located_at_opt_venv(self, request: pytest.FixtureRequest) -> None:
198201
container_id = self.start_container(self.PYTHON_DOCKER_IMAGE)
199202
executor = DockerExecutor(container_id)
200203

204+
# Create system distractor packages first (should NOT be detected)
205+
self._create_system_distractor_packages(executor)
206+
201207
# Create a virtual environment at /opt/venv
202208
venv_path = "/opt/venv"
203209
self._create_venv(executor, venv_path)
@@ -225,6 +231,58 @@ def test_venv_located_at_opt_venv(self, request: pytest.FixtureRequest) -> None:
225231
if container_id:
226232
self.cleanup_container(container_id)
227233

234+
@pytest.mark.skipif(docker is None, reason="Docker not available")
235+
def test_pip_show_pip_venv_detection(self, request: pytest.FixtureRequest) -> None:
236+
"""Test venv detection using pip show pip fallback method with activated venv."""
237+
verbose_output = self.setup_verbose_output(request)
238+
container_id = None
239+
240+
try:
241+
container_id = self.start_container(self.PYTHON_DOCKER_IMAGE)
242+
executor = DockerExecutor(container_id)
243+
244+
# Create system distractor packages first (should NOT be detected)
245+
self._create_system_distractor_packages(executor)
246+
247+
# Create a venv in an unusual location that won't be found by standard searches
248+
unusual_venv_path = "/unusual/location/myvenv"
249+
_, _, exit_code = executor.execute_command(f"mkdir -p {os.path.dirname(unusual_venv_path)}")
250+
assert exit_code == 0, "Failed to create parent directory for unusual venv path"
251+
252+
self._create_venv(executor, unusual_venv_path)
253+
self._install_test_packages(executor, unusual_venv_path)
254+
255+
# Activate the venv by using its pip directly (simulates activated environment)
256+
venv_pip = f"{unusual_venv_path}/bin/pip"
257+
_, _, exit_code = executor.execute_command(f"{venv_pip} --version")
258+
assert exit_code == 0, "Venv pip should be usable"
259+
260+
# Override PATH to make the venv pip the default (simulates activation)
261+
_, _, exit_code = executor.execute_command(f"ln -sf {venv_pip} /usr/local/bin/pip")
262+
assert exit_code == 0, "Failed to make venv pip the default"
263+
264+
self.wait_for_container_ready(container_id, "pip --version", max_wait=60)
265+
266+
# Test detector - should find venv via pip show pip fallback method
267+
detector = PipDetector(debug=True)
268+
packages, metadata = detector.get_dependencies(executor)
269+
270+
if verbose_output:
271+
self.print_verbose_results("PIP SHOW PIP DETECTION:", {"packages": packages, "metadata": metadata})
272+
273+
self._validate_venv_detection(packages, metadata, expected_scope="project")
274+
275+
# Verify it found the unusual venv location via pip show pip
276+
assert metadata["location"].startswith(
277+
unusual_venv_path
278+
), f"Expected location to start with {unusual_venv_path}, got {metadata['location']}"
279+
280+
print(f"✓ Successfully detected venv via pip show pip fallback: {unusual_venv_path}")
281+
282+
finally:
283+
if container_id:
284+
self.cleanup_container(container_id)
285+
228286
@pytest.mark.skipif(docker is None, reason="Docker not available")
229287
def test_no_venv_found_returns_empty(self, request: pytest.FixtureRequest) -> None:
230288
"""Test that when no venv is found in working_dir, empty dependencies are returned."""
@@ -282,6 +340,48 @@ def _install_test_packages(self, executor: DockerExecutor, venv_path: str) -> No
282340
if exit_code != 0:
283341
pytest.fail(f"Failed to install {package} in venv: {stderr}")
284342

343+
def _create_system_distractor_packages(self, executor: DockerExecutor) -> None:
344+
"""Create fake system packages in /usr/local that should NOT be detected by venv detection.
345+
346+
This method creates decoy packages to ensure the detector correctly identifies
347+
venv packages instead of falling back to system-wide installations.
348+
"""
349+
# Create fake system python site-packages structure in /usr/local
350+
system_site_packages = "/usr/local/lib/python3.9/site-packages"
351+
_, _, exit_code = executor.execute_command(f"mkdir -p {system_site_packages}")
352+
if exit_code != 0:
353+
pytest.fail(f"Failed to create system site-packages directory: {system_site_packages}")
354+
355+
# Create fake system packages with different names (these should NOT be detected)
356+
distractor_packages = ["numpy==1.24.0", "pandas==1.5.3"]
357+
for package in distractor_packages:
358+
package_name = package.split("==", maxsplit=1)[0]
359+
package_version = package.split("==", maxsplit=1)[1]
360+
361+
# Create package metadata files that pip would create
362+
dist_info_dir = f"{system_site_packages}/{package_name}-{package_version}.dist-info"
363+
_, _, exit_code = executor.execute_command(f"mkdir -p {dist_info_dir}")
364+
if exit_code != 0:
365+
pytest.fail(f"Failed to create dist-info directory: {dist_info_dir}")
366+
367+
# Create METADATA file
368+
metadata_content = f"""Name: {package_name}
369+
Version: {package_version}
370+
"""
371+
_, _, exit_code = executor.execute_command(f"echo '{metadata_content}' > {dist_info_dir}/METADATA")
372+
if exit_code != 0:
373+
pytest.fail(f"Failed to create METADATA file for {package_name}")
374+
375+
# Also create a fake pyvenv.cfg in /usr/local (should NOT be detected due to /usr/local exclusion)
376+
fake_venv_cfg = "/usr/local/pyvenv.cfg"
377+
cfg_content = """home = /usr/bin
378+
include-system-site-packages = false
379+
version = 3.9.0
380+
"""
381+
_, _, exit_code = executor.execute_command(f"echo '{cfg_content}' > {fake_venv_cfg}")
382+
if exit_code != 0:
383+
pytest.fail(f"Failed to create fake pyvenv.cfg at {fake_venv_cfg}")
384+
285385
def _validate_venv_detection(
286386
self, packages: list[Dict[str, Any]], metadata: Dict[str, Any], expected_scope: str
287387
) -> None:
@@ -294,6 +394,20 @@ def _validate_venv_detection(
294394
assert isinstance(metadata["location"], str), f"Location should be string, got {type(metadata['location'])}"
295395
assert len(metadata["location"]) > 0, "Location should not be empty"
296396
assert "hash" in metadata, "Project scope should include hash"
397+
398+
# Verify distractor packages are NOT detected
399+
package_names = [pkg["name"] for pkg in packages]
400+
distractor_package_names = ["numpy", "pandas"]
401+
for distractor_pkg in distractor_package_names:
402+
assert (
403+
distractor_pkg not in package_names
404+
), f"Distractor package {distractor_pkg} was incorrectly detected in venv packages"
405+
406+
# Verify expected venv packages ARE detected
407+
expected_venv_packages = ["requests", "click"]
408+
for venv_pkg in expected_venv_packages:
409+
assert venv_pkg in package_names, f"Expected venv package {venv_pkg} not found in packages"
410+
297411
else:
298412
# System scope should have empty metadata
299413
assert not metadata, f"System scope should have empty metadata, got {metadata}"
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
FROM greencoding/gcb_playwright:v20
2+
3+
RUN cd /root && \
4+
python3 -m virtualenv venv && \
5+
/root/venv/bin/pip install psutils
6+
7+
CMD ["tail", "-f", "/dev/null"]
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/bin/bash
2+
3+
# Build Playwright container and verify psutils is detected in /root/venv/ by dependency_resolver
4+
5+
set -e
6+
7+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
8+
DOCKERFILE_PATH="$SCRIPT_DIR"
9+
DEPENDENCY_RESOLVER_PATH="$SCRIPT_DIR/../../.."
10+
CONTAINER_NAME="test-playwright-$(date +%s)"
11+
12+
echo "Building Docker image from $DOCKERFILE_PATH..."
13+
docker build -t "$CONTAINER_NAME" -f "$DOCKERFILE_PATH/Dockerfile" "$DOCKERFILE_PATH"
14+
15+
echo "Starting container..."
16+
docker run --rm -d --name "$CONTAINER_NAME" "$CONTAINER_NAME"
17+
18+
echo "Running dependency_resolver on container $CONTAINER_NAME..."
19+
cd "$DEPENDENCY_RESOLVER_PATH"
20+
source venv/bin/activate
21+
22+
echo "Checking for psutils in output..."
23+
if python3 -m dependency_resolver docker "$CONTAINER_NAME" | grep -q "psutils"; then
24+
echo "✓ psutils found in dependency output"
25+
else
26+
echo "✗ psutils NOT found in dependency output"
27+
fi
28+
29+
echo "Cleaning up..."
30+
docker stop "$CONTAINER_NAME"
31+
docker rmi "$CONTAINER_NAME"
32+
33+
echo "Test complete!"

0 commit comments

Comments
 (0)