Skip to content

Commit 5fd3a0e

Browse files
davidkoppclaude
andcommitted
Add Maven detector with performance optimizations
- Implement complete Maven detector for Java dependency resolution - Support hybrid approach: Maven CLI with pom.xml parsing fallback - Maven wrapper (./mvnw) takes priority over system Maven - Performance optimizations: caching, batch mode (-B), simplified hashing - Comprehensive test suite with Docker environments - Complete technical documentation - Filter test dependencies (exclude test scope) - Handle property resolution in pom.xml parsing 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 65a84a1 commit 5fd3a0e

File tree

7 files changed

+826
-0
lines changed

7 files changed

+826
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ python3 -m dependency_resolver -h
3434
- **apk** - System packages of Alpine
3535
- **pip** - Python packages
3636
- **npm** - Node.js packages
37+
- **maven** - Java packages
3738

3839
Also captures **Docker container metadata** when analyzing containers.
3940

dependency_resolver/core/orchestrator.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from ..detectors.dpkg_detector import DpkgDetector
66
from ..detectors.apk_detector import ApkDetector
77
from ..detectors.docker_info_detector import DockerInfoDetector
8+
from ..detectors.maven_detector import MavenDetector
89

910

1011
class Orchestrator:
@@ -24,6 +25,7 @@ def __init__(
2425
DockerInfoDetector(),
2526
DpkgDetector(),
2627
ApkDetector(),
28+
MavenDetector(debug=debug),
2729
PipDetector(venv_path=venv_path, debug=debug),
2830
NpmDetector(debug=debug),
2931
]
Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
import hashlib
2+
import re
3+
import xml.etree.ElementTree as ET
4+
from typing import Optional, Any
5+
6+
from ..core.interfaces import EnvironmentExecutor, PackageManagerDetector
7+
8+
9+
class MavenDetector(PackageManagerDetector):
10+
"""Detector for Maven-based Java projects."""
11+
12+
NAME = "maven"
13+
14+
def __init__(self, debug: bool = False):
15+
self.debug = debug
16+
self._maven_available_cache: bool | None = None
17+
self._maven_command_cache: str | None = None
18+
19+
def is_usable(self, executor: EnvironmentExecutor, working_dir: Optional[str] = None) -> bool:
20+
"""Check if this is a Maven project by looking for pom.xml."""
21+
search_dir = working_dir or "."
22+
return executor.path_exists(f"{search_dir}/pom.xml")
23+
24+
def get_dependencies(self, executor: EnvironmentExecutor, working_dir: Optional[str] = None) -> dict[str, Any]:
25+
"""Extract Maven dependencies with versions."""
26+
search_dir = working_dir or "."
27+
location = self._resolve_absolute_path(executor, search_dir)
28+
dependencies: dict[str, dict[str, str]] = {}
29+
30+
# Always project scope for Maven projects
31+
result: dict[str, Any] = {"scope": "project", "location": location}
32+
33+
# Try Maven command first if available
34+
if self._maven_available(executor, working_dir):
35+
dependencies = self._get_dependencies_via_maven(executor, working_dir)
36+
else:
37+
# Fallback to pom.xml parsing
38+
dependencies = self._get_dependencies_via_pom_parsing(executor, search_dir)
39+
40+
# Generate location-based hash if we have dependencies
41+
if dependencies:
42+
result["hash"] = self._generate_location_hash(executor, location)
43+
44+
result["dependencies"] = dependencies
45+
return result
46+
47+
def has_system_scope(self, executor: EnvironmentExecutor, working_dir: Optional[str] = None) -> bool:
48+
"""Maven is always project scope."""
49+
return False
50+
51+
def _maven_available(self, executor: EnvironmentExecutor, working_dir: Optional[str] = None) -> bool:
52+
"""Check if Maven is available in the environment."""
53+
# Return cached result if available
54+
if self._maven_available_cache is not None:
55+
return self._maven_available_cache
56+
57+
# Check for Maven wrapper first (project-specific)
58+
search_dir = working_dir or "."
59+
if executor.path_exists(f"{search_dir}/mvnw"):
60+
_, _, exit_code = executor.execute_command("./mvnw --version", working_dir)
61+
if exit_code == 0:
62+
self._maven_available_cache = True
63+
self._maven_command_cache = "./mvnw"
64+
return True
65+
66+
# Fall back to system Maven
67+
_, _, exit_code = executor.execute_command("mvn --version", working_dir)
68+
result = exit_code == 0
69+
self._maven_available_cache = result
70+
if result:
71+
self._maven_command_cache = "mvn"
72+
return result
73+
74+
def _get_maven_command(self, executor: EnvironmentExecutor, working_dir: Optional[str] = None) -> str:
75+
"""Determine which Maven command to use (wrapper first, then system)."""
76+
# Return cached result if available (set by _maven_available)
77+
if self._maven_command_cache is not None:
78+
return self._maven_command_cache
79+
80+
# If not cached, ensure Maven availability is checked first to populate cache
81+
self._maven_available(executor, working_dir)
82+
return self._maven_command_cache or "mvn"
83+
84+
def _get_dependencies_via_maven(
85+
self, executor: EnvironmentExecutor, working_dir: Optional[str] = None
86+
) -> dict[str, dict[str, str]]:
87+
"""Extract dependencies using Maven command."""
88+
# Determine which Maven command to use
89+
maven_cmd = self._get_maven_command(executor, working_dir)
90+
91+
stdout, stderr, exit_code = executor.execute_command(
92+
f"{maven_cmd} dependency:list -B -q -DoutputFile=/dev/stdout -DexcludeTransitive=true", working_dir
93+
)
94+
95+
if exit_code != 0:
96+
if self.debug:
97+
print(f"ERROR: Maven dependency:list failed with exit code {exit_code}")
98+
print(f"ERROR: stderr: {stderr}")
99+
return {}
100+
101+
dependencies: dict[str, dict[str, str]] = {}
102+
103+
# Parse Maven dependency:list output
104+
# Format: groupId:artifactId:type:version:scope
105+
for line in stdout.strip().split("\n"):
106+
original_line = line
107+
line = line.strip()
108+
if not line or line.startswith("[") or "The following files have been resolved:" in line:
109+
continue
110+
111+
# Match Maven coordinate format - look for lines that originally had leading spaces and contain ':'
112+
if original_line.startswith(" ") and ":" in line:
113+
# Clean the line from ANSI codes and extra whitespace
114+
clean_line = line.strip()
115+
116+
# Split on colon to get Maven coordinates
117+
parts = clean_line.split(":")
118+
if len(parts) >= 5:
119+
group_id = parts[0].strip()
120+
artifact_id = parts[1].strip()
121+
version = parts[3].strip()
122+
scope_with_extra = parts[4].strip()
123+
124+
# Extract scope by removing ANSI codes and extra text
125+
# Scope is everything before the first ANSI escape sequence or special character
126+
scope = re.split(r"[\x1b\s]", scope_with_extra)[0].strip()
127+
128+
# Include compile, runtime, and provided scopes; exclude test
129+
if scope in ("compile", "runtime", "provided"):
130+
package_name = f"{group_id}:{artifact_id}"
131+
dependencies[package_name] = {"version": version}
132+
133+
return dependencies
134+
135+
def _get_dependencies_via_pom_parsing(
136+
self, executor: EnvironmentExecutor, search_dir: str
137+
) -> dict[str, dict[str, str]]:
138+
"""Extract dependencies by parsing pom.xml directly."""
139+
pom_path = f"{search_dir}/pom.xml"
140+
141+
# Read pom.xml content
142+
stdout, stderr, exit_code = executor.execute_command(f"cat '{pom_path}'")
143+
if exit_code != 0:
144+
if self.debug:
145+
print(f"ERROR: Failed to read pom.xml: {stderr}")
146+
return {}
147+
148+
try:
149+
# Parse XML content
150+
root = ET.fromstring(stdout)
151+
152+
# Handle XML namespaces
153+
namespace = ""
154+
if root.tag.startswith("{"):
155+
namespace = root.tag[root.tag.find("{") : root.tag.find("}") + 1]
156+
157+
dependencies: dict[str, dict[str, str]] = {}
158+
159+
# Find dependencies section
160+
deps_element = root.find(f"{namespace}dependencies")
161+
if deps_element is not None:
162+
for dep in deps_element.findall(f"{namespace}dependency"):
163+
group_id_elem = dep.find(f"{namespace}groupId")
164+
artifact_id_elem = dep.find(f"{namespace}artifactId")
165+
version_elem = dep.find(f"{namespace}version")
166+
scope_elem = dep.find(f"{namespace}scope")
167+
168+
if group_id_elem is not None and artifact_id_elem is not None:
169+
group_id = group_id_elem.text or ""
170+
artifact_id = artifact_id_elem.text or ""
171+
version = version_elem.text if version_elem is not None else "unknown"
172+
scope = scope_elem.text if scope_elem is not None else "compile"
173+
174+
# Skip test-scoped dependencies by default
175+
if scope != "test":
176+
package_name = f"{group_id}:{artifact_id}"
177+
# Resolve property placeholders in version if possible
178+
resolved_version = self._resolve_version_properties(version or "unknown", root, namespace)
179+
dependencies[package_name] = {"version": resolved_version}
180+
181+
return dependencies
182+
183+
except ET.ParseError as e:
184+
if self.debug:
185+
print(f"ERROR: Failed to parse pom.xml: {e}")
186+
return {}
187+
188+
def _resolve_version_properties(self, version: str, root: ET.Element, namespace: str) -> str:
189+
"""Attempt to resolve Maven property placeholders in version strings."""
190+
if not version.startswith("${") or not version.endswith("}"):
191+
return version
192+
193+
# Extract property name
194+
prop_name = version[2:-1]
195+
196+
# Look for property in properties section
197+
properties = root.find(f"{namespace}properties")
198+
if properties is not None:
199+
prop_elem = properties.find(f"{namespace}{prop_name}")
200+
if prop_elem is not None and prop_elem.text:
201+
return prop_elem.text
202+
203+
# Check for common built-in properties
204+
if prop_name == "project.version":
205+
version_elem = root.find(f"{namespace}version")
206+
if version_elem is not None and version_elem.text:
207+
return version_elem.text
208+
209+
# Return original if we can't resolve
210+
return version
211+
212+
def _resolve_absolute_path(self, executor: EnvironmentExecutor, path: str) -> str:
213+
"""Resolve absolute path within the executor's context."""
214+
if path == ".":
215+
stdout, stderr, exit_code = executor.execute_command("pwd")
216+
if exit_code == 0 and stdout.strip():
217+
return stdout.strip()
218+
raise RuntimeError(f"Failed to resolve current directory in executor context: {stderr}")
219+
else:
220+
stdout, stderr, exit_code = executor.execute_command(f"cd '{path}' && pwd")
221+
if exit_code == 0 and stdout.strip():
222+
return stdout.strip()
223+
raise RuntimeError(f"Failed to resolve path '{path}' in executor context: {stderr}")
224+
225+
def _generate_location_hash(self, executor: EnvironmentExecutor, location: str) -> str:
226+
"""Generate a hash based on the Maven project files."""
227+
stdout, _, exit_code = executor.execute_command(
228+
f"cd '{location}' && find . "
229+
"-name 'target' -prune -o "
230+
"-name '.m2' -prune -o "
231+
"\\( -name 'pom.xml' -o -name '*.properties' \\) "
232+
"-type f -printf '%s %p\\n' | LC_COLLATE=C sort -n -k1,1 -k2,2"
233+
)
234+
235+
if exit_code == 0 and stdout.strip():
236+
content = stdout.strip()
237+
return hashlib.sha256(content.encode()).hexdigest()
238+
else:
239+
if self.debug:
240+
print(f"ERROR: maven_detector hash generation command failed with exit code {exit_code}")
241+
print(f"ERROR: location: {location}")
242+
return ""

0 commit comments

Comments
 (0)