11"""This module is an all-in-one parser and validator for Git URLs.
22
33- Detection: :meth:`GitURL.is_valid()`
4- - Parse: :class:`GitURL`
4+ - Parse:
55
66 compare to :class:`urllib.parse.ParseResult`
77
8- - Output ``git(1)`` URL: :meth:`GitURL.to_url()`
8+ - Compatibility focused: :class:`GitURL`: Will work with ``git(1)`` as well as
9+ ``pip(1)`` style URLs
10+
11+ - Output ``git(1)`` URL: :meth:`GitURL.to_url()`
12+ - Strict ``git(1)`` compatibility: :class:`GitBaseURL`.
13+
14+ - Output ``git(1)`` URL: :meth:`GitBaseURL.to_url()`
915- Extendable via :class:`~libvcs.parse.base.MatcherRegistry`,
1016 :class:`~libvcs.parse.base.Matcher`
1117"""
2329# We modified it to have groupings
2430SCP_REGEX = r"""
2531 # Optional user, e.g. 'git@'
26- (?P<user>( \w+))?@
32+ (( ?P<user>\w+)@)?
2733 # Server, e.g. 'github.com'.
2834 (?P<hostname>([^/:]+)):
2935 # The server-side path. e.g. 'user/project.git'. Must start with an
3339 """
3440
3541RE_PATH = r"""
42+ ((?P<user>\w+)@)?
3643 (?P<hostname>([^/:]+))
44+ (:(?P<port>\d{1,5}))?
3745 (?P<separator>[:,/])?
3846 (?P<path>
39- (\w[^:.]*) # cut the path at . to negate .git
47+ (\w[^:.@ ]*) # cut the path at . to negate .git, @ from pip
4048 )?
4149"""
4250
100108 )
101109"""
102110
103- RE_PIP_SCHEME_WITH_HTTP = r"""
111+ RE_PIP_SCP_SCHEME = r"""
104112 (?P<scheme>
105113 (
106114 git\+ssh|
107- git\+https|
108- git\+http|
109115 git\+file
110116 )
111117 )
112118"""
113119
120+ RE_PIP_REV = r"""
121+ (@(?P<rev>.*))
122+ """
123+
124+
114125PIP_DEFAULT_MATCHERS : list [Matcher ] = [
115126 Matcher (
116127 label = "pip-url" ,
117128 description = "pip-style git URL" ,
118129 pattern = re .compile (
119130 rf"""
120- { RE_PIP_SCHEME_WITH_HTTP }
131+ { RE_PIP_SCHEME }
121132 ://
122133 { RE_PATH }
123134 { RE_SUFFIX } ?
135+ { RE_PIP_REV } ?
124136 """ ,
125137 re .VERBOSE ,
126138 ),
130142 description = "pip-style git ssh/scp URL" ,
131143 pattern = re .compile (
132144 rf"""
133- { RE_PIP_SCHEME }
145+ { RE_PIP_SCP_SCHEME }
134146 { SCP_REGEX } ?
135- { RE_SUFFIX }
147+ { RE_SUFFIX } ?
148+ { RE_PIP_REV } ?
136149 """ ,
137150 re .VERBOSE ,
138151 ),
142155 label = "pip-file-url" ,
143156 description = "pip-style git+file:// URL" ,
144157 pattern = re .compile (
145- r """
158+ rf """
146159 (?P<scheme>git\+file)://
147- (?P<path>.*)
160+ (?P<path>[^@]*)
161+ { RE_PIP_REV } ?
148162 """ ,
149163 re .VERBOSE ,
150164 ),
193207
194208
195209@dataclasses .dataclass (repr = False )
196- class GitURL (URLProtocol , SkipDefaultFieldsReprMixin ):
210+ class GitBaseURL (URLProtocol , SkipDefaultFieldsReprMixin ):
197211 """Git gepository location. Parses URLs on initialization.
198212
199213 Examples
@@ -216,9 +230,9 @@ class GitURL(URLProtocol, SkipDefaultFieldsReprMixin):
216230
217231 >>> GitURL(url='git@github.com:vcs-python/libvcs.git')
218232 GitURL(url=git@github.com:vcs-python/libvcs.git,
233+ user=git,
219234 hostname=github.com,
220235 path=vcs-python/libvcs,
221- user=git,
222236 suffix=.git,
223237 matcher=core-git-scp)
224238
@@ -229,28 +243,18 @@ class GitURL(URLProtocol, SkipDefaultFieldsReprMixin):
229243 ----------
230244 matcher : str
231245 name of the :class:`~libvcs.parse.base.Matcher`
232-
233- branch : str, optional
234- Default URL parsers don't output these,
235- can be added by extending or passing manually
236246 """
237247
238248 url : str
239249 scheme : Optional [str ] = None
250+ user : Optional [str ] = None
240251 hostname : Optional [str ] = None
252+ port : Optional [int ] = None
241253 path : Optional [str ] = None
242- user : Optional [str ] = None
243254
244255 # Decoration
245256 suffix : Optional [str ] = None
246257
247- #
248- # commit-ish: tag, branch, ref, revision
249- #
250- ref : Optional [str ] = None
251- branch : Optional [str ] = None
252- tag : Optional [str ] = None
253-
254258 matcher : Optional [str ] = None
255259 matchers = MatcherRegistry = MatcherRegistry (
256260 _matchers = {m .label : m for m in DEFAULT_MATCHERS }
@@ -298,9 +302,9 @@ def to_url(self) -> str:
298302
299303 >>> git_location
300304 GitURL(url=git@github.com:vcs-python/libvcs.git,
305+ user=git,
301306 hostname=github.com,
302307 path=vcs-python/libvcs,
303- user=git,
304308 suffix=.git,
305309 matcher=core-git-scp)
306310
@@ -333,3 +337,87 @@ def to_url(self) -> str:
333337 parts .append (self .suffix )
334338
335339 return "" .join (part for part in parts if isinstance (part , str ))
340+
341+
342+ @dataclasses .dataclass (repr = False )
343+ class GitPipURL (GitBaseURL , URLProtocol , SkipDefaultFieldsReprMixin ):
344+ """Supports pip git URLs."""
345+
346+ # commit-ish (rev): tag, branch, ref
347+ rev : Optional [str ] = None
348+
349+ matchers = MatcherRegistry = MatcherRegistry (
350+ _matchers = {m .label : m for m in PIP_DEFAULT_MATCHERS }
351+ )
352+
353+ def to_url (self ) -> str :
354+ """Exports a pip-compliant URL.
355+
356+ Examples
357+ --------
358+
359+ >>> git_location = GitPipURL(
360+ ... url='git+ssh://git@bitbucket.example.com:7999/PROJ/repo.git'
361+ ... )
362+
363+ >>> git_location
364+ GitPipURL(url=git+ssh://git@bitbucket.example.com:7999/PROJ/repo.git,
365+ scheme=git+ssh,
366+ user=git,
367+ hostname=bitbucket.example.com,
368+ port=7999,
369+ path=PROJ/repo,
370+ suffix=.git,
371+ matcher=pip-url)
372+
373+ >>> git_location.path = 'libvcs/vcspull'
374+
375+ >>> git_location.to_url()
376+ 'git+ssh://bitbucket.example.com/libvcs/vcspull.git'
377+
378+ It also accepts revisions, e.g. branch, tag, ref:
379+
380+ >>> git_location = GitPipURL(
381+ ... url='git+https://github.com/vcs-python/libvcs.git@v0.10.0'
382+ ... )
383+
384+ >>> git_location
385+ GitPipURL(url=git+https://github.com/vcs-python/libvcs.git@v0.10.0,
386+ scheme=git+https,
387+ hostname=github.com,
388+ path=vcs-python/libvcs,
389+ suffix=.git,
390+ matcher=pip-url,
391+ rev=v0.10.0)
392+
393+ >>> git_location.path = 'libvcs/vcspull'
394+
395+ >>> git_location.to_url()
396+ 'git+https://github.com/libvcs/vcspull.git@v0.10.0'
397+ """
398+ url = super ().to_url ()
399+
400+ if self .rev :
401+ url = f"{ url } @{ self .rev } "
402+
403+ return url
404+
405+
406+ @dataclasses .dataclass (repr = False )
407+ class GitURL (GitPipURL , GitBaseURL , URLProtocol , SkipDefaultFieldsReprMixin ):
408+ """Batteries included URL Parser. Supports git(1) and pip URLs.
409+
410+ **Ancestors (MRO)**
411+ This URL parser inherits methods and attributes from the following parsers:
412+
413+ - :class:`GitPipURL`
414+
415+ - :meth:`GitPipURL.to_url`
416+ - :class:`GitBaseURL`
417+
418+ - :meth:`GitBaseURL.to_url`
419+ """
420+
421+ matchers = MatcherRegistry = MatcherRegistry (
422+ _matchers = {m .label : m for m in [* DEFAULT_MATCHERS , * PIP_DEFAULT_MATCHERS ]}
423+ )
0 commit comments