Skip to content

Commit 958b9ea

Browse files
committed
code mostly ready
1 parent ed0183a commit 958b9ea

File tree

11 files changed

+682
-0
lines changed

11 files changed

+682
-0
lines changed

gitlab_submodule/__init__.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
"""List project submodules and get the commits they point to with python-gitlab
2+
"""
3+
4+
__version__ = '0.1.0'
5+
__all__ = [
6+
'GitmodulesSubmodule', 'ProjectSubmodule',
7+
'list_all_project_submodules',
8+
'read_gitlab_submodule',
9+
'iterate_gitlab_project_submodules', 'list_gitlab_project_submodules'
10+
]
11+
12+
from gitlab_submodule.objects import GitmodulesSubmodule, ProjectSubmodule
13+
from gitlab_submodule.gitlab_submodule import (
14+
list_all_project_submodules,
15+
read_gitlab_submodule,
16+
iterate_gitlab_project_submodules,
17+
list_gitlab_project_submodules
18+
)
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
from typing import List, Optional, Iterable
2+
3+
from gitlab.v4.objects import Project
4+
5+
from gitlab_submodule.objects import GitmodulesSubmodule, ProjectSubmodule
6+
from gitlab_submodule.read_gitmodules import list_project_submodules
7+
from gitlab_submodule.gitmodules_to_project import gitmodules_to_project
8+
from gitlab_submodule.submodule_commit import get_submodule_commit
9+
10+
11+
def list_all_project_submodules(
12+
project: Project,
13+
ref: Optional[str] = None) -> List[GitmodulesSubmodule]:
14+
return list_project_submodules(project, ref)
15+
16+
17+
def read_gitlab_submodule(
18+
gitmodules_submodule: GitmodulesSubmodule,
19+
get_latest_commit_possible_if_not_found: bool = True,
20+
get_latest_commit_possible_ref: Optional[str] = None
21+
) -> ProjectSubmodule:
22+
submodule_project = gitmodules_to_project(gitmodules_submodule)
23+
submodule_commit, commit_is_exact = get_submodule_commit(
24+
gitmodules_submodule,
25+
submodule_project,
26+
get_latest_commit_possible_if_not_found,
27+
get_latest_commit_possible_ref)
28+
return ProjectSubmodule(
29+
gitmodules_submodule,
30+
submodule_project,
31+
submodule_commit,
32+
commit_is_exact
33+
)
34+
35+
36+
def iterate_gitlab_project_submodules(
37+
project: Project,
38+
ref: Optional[str] = None,
39+
get_latest_commit_possible_if_not_found: bool = True,
40+
get_latest_commit_possible_ref: Optional[str] = None
41+
) -> Iterable[ProjectSubmodule]:
42+
for gitmodules_submodule in list_all_project_submodules(project, ref):
43+
try:
44+
yield read_gitlab_submodule(
45+
gitmodules_submodule,
46+
get_latest_commit_possible_if_not_found,
47+
get_latest_commit_possible_ref)
48+
except ValueError:
49+
continue
50+
except Exception:
51+
raise
52+
53+
54+
def list_gitlab_project_submodules(**kwargs) -> List[ProjectSubmodule]:
55+
return list(iterate_gitlab_project_submodules(**kwargs))
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from typing import Optional
2+
3+
from posixpath import join, normpath
4+
from giturlparse import parse
5+
6+
from gitlab import Gitlab
7+
from gitlab.v4.objects import Project
8+
9+
from gitlab_submodule.objects import GitmodulesSubmodule
10+
11+
12+
def gitmodules_to_project(submodule: GitmodulesSubmodule) -> Project:
13+
submodule_project_path_with_namespace = \
14+
_submodule_url_to_path_with_namespace(submodule.url,
15+
submodule.parent_project)
16+
if not submodule_project_path_with_namespace:
17+
raise ValueError(
18+
f'submodule at {submodule.url} is not hosted on Gitlab')
19+
gl = Gitlab()
20+
submodule_project = gl.projects.get(submodule_project_path_with_namespace)
21+
return submodule_project
22+
23+
24+
def _submodule_url_to_path_with_namespace(
25+
url: str,
26+
parent_project: Project
27+
) -> Optional[str]:
28+
"""Returns a path pointing to a Gitlab project, or None if the submodule
29+
is hosted elsewhere
30+
"""
31+
try:
32+
parsed = parse(url)
33+
if parsed.platform != 'gitlab':
34+
return None
35+
if parsed.groups:
36+
to_join = [parsed.owner, join(*parsed.groups), parsed.repo]
37+
else:
38+
to_join = [parsed.owner, parsed.repo]
39+
path_with_namespace = join(*to_join)
40+
return path_with_namespace
41+
except Exception as e:
42+
print(e)
43+
# check if the submodule url is a relative path to the project path
44+
if url.startswith('./') or url.startswith('../'):
45+
# we build the path of the submodule project using the path of
46+
# the current project
47+
path_with_namespace = normpath(
48+
join(parent_project.path_with_namespace, url))
49+
return path_with_namespace
50+
return None

gitlab_submodule/objects.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
from gitlab.v4.objects import Project, ProjectCommit
2+
3+
4+
class GitmodulesSubmodule:
5+
6+
def __init__(self,
7+
parent_project: Project,
8+
parent_ref: str,
9+
name: str,
10+
path: str,
11+
url: str):
12+
self.parent_project = parent_project
13+
self.parent_ref = parent_ref
14+
self.name = name
15+
self.path = path
16+
self.url = url
17+
18+
def keys(self):
19+
return {'parent_project', 'parent_ref', 'name', 'path', 'url'}
20+
21+
def __getitem__(self, key):
22+
if key in self.keys():
23+
return getattr(self, key)
24+
else:
25+
raise KeyError(key)
26+
27+
def __str__(self):
28+
keys = sorted(self.keys())
29+
class_part = f"<class '{self.__class__.__name__}'>"
30+
31+
def to_str(key):
32+
if isinstance(self[key], str):
33+
return f"'{self[key]}'"
34+
else:
35+
return str(self[key])
36+
37+
attributes = [f"'{key}': {to_str(key)}" for key in keys]
38+
return class_part + ' => {' + ', '.join(attributes) + '}'
39+
40+
def __repr__(self):
41+
return '{} ({}, {}, {}, {}, {})'.format(
42+
self.__class__.__name__,
43+
repr(self.parent_project),
44+
f"'{self.parent_ref}'",
45+
f"'{self.name}'",
46+
f"'{self.path}'",
47+
f"'{self.url}'",
48+
)
49+
50+
51+
def lstrip(string: str, pattern: str) -> str:
52+
if string[:len(pattern)] == pattern:
53+
return string[len(pattern):]
54+
else:
55+
return string
56+
57+
58+
class ProjectSubmodule:
59+
def __init__(self,
60+
submodule: GitmodulesSubmodule,
61+
project: Project,
62+
commit: ProjectCommit,
63+
commit_is_exact: bool):
64+
self.submodule = submodule
65+
self.project = project
66+
self.commit = commit
67+
self.commit_is_exact = commit_is_exact
68+
69+
def __getattribute__(self, item: str):
70+
try:
71+
return super().__getattribute__(item)
72+
except AttributeError:
73+
for attribute in {'submodule', 'project', 'commit'}:
74+
if item.startswith(f'{attribute}_'):
75+
return getattr(super().__getattribute__(attribute),
76+
lstrip(item, f'{attribute}_'))
77+
78+
raise AttributeError("'{} object has no attribute '{}'".format(
79+
self.__class__.__name__, item))
80+
81+
def __setattr__(self, key, value):
82+
for attribute in {'submodule', 'project', 'commit'}:
83+
if key == attribute:
84+
return super().__setattr__(key, value)
85+
if key.startswith(f'{attribute}_'):
86+
return setattr(getattr(self, attribute),
87+
lstrip(key, f'{attribute}_'),
88+
value)
89+
90+
try:
91+
super().__setattr__(key, value)
92+
except AttributeError:
93+
raise AttributeError("'{} object has no attribute '{}'".format(
94+
self.__class__.__name__, key))
95+
96+
def __str__(self):
97+
class_part = f"<class '{self.__class__.__name__}'>"
98+
attributes = [f"\n '{key}': {getattr(self, key)}"
99+
for key in ['submodule', 'project', 'commit']]
100+
return class_part + ' => {' + ','.join(attributes) + '\n}'
101+
102+
def __repr__(self):
103+
return '{} (\n {},\n {},\n {}\n)'.format(
104+
self.__class__.__name__,
105+
repr(self.submodule),
106+
repr(self.project),
107+
repr(self.commit),
108+
)
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
from typing import List, Optional, Iterable, Tuple
2+
3+
import re
4+
5+
from gitlab.v4.objects import Project
6+
7+
from gitlab_submodule.objects import GitmodulesSubmodule
8+
9+
10+
def list_project_submodules(
11+
project: Project,
12+
ref: Optional[str] = None) -> List[GitmodulesSubmodule]:
13+
return list(_get_project_submodules(project, ref))
14+
15+
16+
def _get_project_submodules(
17+
project: Project,
18+
ref: Optional[str] = None) -> Iterable[GitmodulesSubmodule]:
19+
gitmodules_file_content = _get_gitmodules_file_content(project, ref)
20+
if not gitmodules_file_content:
21+
return []
22+
for (name, url, path) in _read_gitmodules_file_content(
23+
gitmodules_file_content):
24+
yield GitmodulesSubmodule(
25+
parent_project=project,
26+
parent_ref=ref if ref else project.default_branch,
27+
name=name,
28+
url=url,
29+
path=path)
30+
31+
32+
def _get_gitmodules_file_content(project: Project,
33+
ref: Optional[str] = None) -> Optional[str]:
34+
try:
35+
gitmodules = project.files.get(
36+
'.gitmodules',
37+
ref=ref if ref else project.default_branch)
38+
return gitmodules.decode().decode('utf-8')
39+
except Exception:
40+
return None
41+
42+
43+
def _read_gitmodules_file_content(
44+
gitmodules_file_content: str) -> List[Tuple[str, str, str]]:
45+
"""Some basic regex extractions to parse content of .gitmodules file
46+
"""
47+
name_regex = r'\[submodule "([a-zA-Z0-9\.\-/_]+)"\]'
48+
path_regex = r'path ?= ?([a-zA-Z0-9\.\-/_]+)'
49+
url_regex = r'url ?= ?([a-zA-Z0-9\.\-/_:@]+)'
50+
names = re.findall(name_regex, gitmodules_file_content)
51+
paths = re.findall(path_regex, gitmodules_file_content)
52+
urls = re.findall(url_regex, gitmodules_file_content)
53+
if not (len(names) == len(paths) == len(urls)):
54+
raise RuntimeError('Failed parsing the .gitmodules content')
55+
return list(zip(names, urls, paths))
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
from typing import Optional, Tuple
2+
3+
import re
4+
5+
from gitlab.v4.objects import Project, ProjectCommit
6+
7+
from gitlab_submodule.objects import GitmodulesSubmodule
8+
9+
10+
def get_submodule_commit(
11+
submodule: GitmodulesSubmodule,
12+
submodule_project: Project,
13+
get_latest_commit_possible_if_not_found: bool = True,
14+
get_latest_commit_possible_ref: Optional[str] = None
15+
) -> Tuple[ProjectCommit, bool]:
16+
commit_id, is_exact = _get_submodule_commit_id(
17+
submodule.parent_project,
18+
submodule.path,
19+
submodule.parent_ref,
20+
submodule_project,
21+
get_latest_commit_possible_if_not_found,
22+
get_latest_commit_possible_ref
23+
)
24+
commit = submodule_project.commits.get(commit_id)
25+
return commit, is_exact
26+
27+
28+
def _get_submodule_commit_id(
29+
project: Project,
30+
submodule_path: str,
31+
ref: Optional[str] = None,
32+
submodule_project: Optional[Project] = None,
33+
get_latest_commit_possible_if_not_found: bool = True,
34+
get_latest_commit_possible_ref: Optional[str] = None
35+
) -> Tuple[str, bool]:
36+
"""This uses a trick:
37+
- The .gitmodules files doesn't contain the actual commit sha that the
38+
submodules points to.
39+
- Accessing the `<submodule_path>` dir via the ProjectFileManager
40+
doesn't bring any useful info, EXCEPT: the id of the last commit that
41+
modified the file (i.e. that updated the submodule commit sha)
42+
43+
=> We use that info to get the diff of the last commit that updated the
44+
submodule commit
45+
=> We parse the diff to get the new submodule commit sha
46+
47+
NOTE: in some weird cases I observed without really understanding,
48+
a commit which created a .gitmodules file can contain zero submodule
49+
commit sha in its entire diff.
50+
In that case, we can only try to guess which was the latest commit in
51+
the submodule project at the datetime of the commit.
52+
"""
53+
submodule_dir = project.files.get(
54+
submodule_path,
55+
ref=ref if ref else project.default_branch)
56+
last_commit_id = submodule_dir.last_commit_id
57+
update_submodule_commit = project.commits.get(last_commit_id)
58+
59+
submodule_commit_regex = r'Submodule commit ([a-zA-Z0-9]+)\n'
60+
for diff_file in update_submodule_commit.diff():
61+
if diff_file['new_path'] == submodule_path:
62+
# either the commit id was added for the first time,
63+
# or it was updated -> we can find one or two matches
64+
# (or 0 in these weird cases)
65+
matches = re.findall(submodule_commit_regex, diff_file['diff'])
66+
# submodule commit id was updated
67+
if len(matches) == 2:
68+
return matches[1], True
69+
# submodule was added
70+
if len(matches) == 1:
71+
return matches[0], True
72+
73+
# If the commit diff doesn't contain the submodule commit info, we still
74+
# know the date of the last commit in the project that updated the
75+
# submodule, so we can fallback to the last commit in the submodule that
76+
# was created before this date.
77+
# This requires a Project object for the submodule so if it wasn't
78+
# passed we cannot guess anything.
79+
if not get_latest_commit_possible_if_not_found:
80+
raise ValueError(
81+
f'Could not find commit id for submodule {submodule_path} of '
82+
f'project {project.path_with_namespace}.')
83+
else:
84+
last_subproject_commits = submodule_project.commits.list(
85+
ref_name=(get_latest_commit_possible_ref
86+
if get_latest_commit_possible_ref
87+
else submodule_project.default_branch),
88+
until=update_submodule_commit.created_at
89+
)
90+
return last_subproject_commits[0].id, False

requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
python-gitlab==3.1.0
2+
giturlparse==0.10.0

0 commit comments

Comments
 (0)