Skip to content

Commit 89d51e8

Browse files
authored
Merge pull request aspiers#127 from wetneb/1-blame-via-pygit
Blame via pygit2 instead of subprocess
2 parents caca4f6 + b0d5a8d commit 89d51e8

File tree

5 files changed

+122
-20
lines changed

5 files changed

+122
-20
lines changed

git_deps/blame.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import subprocess
2+
import re
3+
from dataclasses import dataclass
4+
5+
# The following classes are introduced to imitate their counterparts in pygit2,
6+
# so that the output of 'blame_via_subprocess' can be swapped with pygit2's own
7+
# blame output.
8+
9+
@dataclass
10+
class GitRef:
11+
"""
12+
A reference to a commit
13+
"""
14+
hex: str
15+
16+
@dataclass
17+
class BlameHunk:
18+
"""
19+
A chunk of a blame output which has the same commit information
20+
for a consecutive set of lines
21+
"""
22+
orig_commit_id: GitRef
23+
orig_start_line_number: int
24+
final_start_line_number: int
25+
lines_in_hunk: int = 1
26+
27+
28+
def blame_via_subprocess(path, commit, start_line, num_lines):
29+
"""
30+
Generate a list of blame hunks by calling 'git blame' as a separate process.
31+
This is a workaround for the slowness of pygit2's own blame algorithm.
32+
See https://github.com/aspiers/git-deps/issues/1
33+
"""
34+
cmd = [
35+
'git', 'blame',
36+
'--porcelain',
37+
'-L', "%d,+%d" % (start_line, num_lines),
38+
commit, '--', path
39+
]
40+
output = subprocess.check_output(cmd, universal_newlines=True)
41+
42+
current_hunk = None
43+
for line in output.split('\n'):
44+
m = re.match(r'^([0-9a-f]{40}) (\d+) (\d+) (\d+)$', line)
45+
46+
if m: # starting a new hunk
47+
if current_hunk:
48+
yield current_hunk
49+
dependency_sha1, orig_line_num, line_num, length = m.group(1, 2, 3, 4)
50+
orig_line_num = int(orig_line_num)
51+
line_num = int(line_num)
52+
length = int(length)
53+
current_hunk = BlameHunk(
54+
orig_commit_id=GitRef(dependency_sha1),
55+
orig_start_line_number = orig_line_num,
56+
final_start_line_number = line_num,
57+
lines_in_hunk = length
58+
)
59+
60+
if current_hunk:
61+
yield current_hunk

git_deps/cli.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ def parse_args():
7575
'[%(default)s]')
7676
parser.add_argument('-d', '--debug', dest='debug', action='store_true',
7777
help='Show debugging')
78+
parser.add_argument('--pygit2-blame', dest='pygit2_blame', action='store_true',
79+
help="Use pygit2's blame algorithm (slower than git's)")
7880

7981
options, args = parser.parse_known_args()
8082

git_deps/detector.py

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from git_deps.gitutils import GitUtils
88
from git_deps.listener.base import DependencyListener
99
from git_deps.errors import InvalidCommitish
10+
from git_deps.blame import blame_via_subprocess
1011

1112

1213
class DependencyDetector(object):
@@ -172,24 +173,26 @@ def blame_diff_hunk(self, dependent, parent, path, hunk):
172173

173174
line_to_culprit = {}
174175

175-
for line in blame.split('\n'):
176-
self.process_hunk_line(dependent, dependent_sha1, parent,
177-
path, line, line_to_culprit)
176+
for blame_hunk in blame:
177+
self.process_blame_hunk(dependent, dependent_sha1, parent,
178+
path, blame_hunk, line_to_culprit)
178179

179180
self.debug_hunk(line_range_before, line_range_after, hunk,
180181
line_to_culprit)
181182

182-
def process_hunk_line(self, dependent, dependent_sha1, parent,
183-
path, line, line_to_culprit):
184-
self.logger.debug(" ! " + line.rstrip())
185-
m = re.match(r'^([0-9a-f]{40}) (\d+) (\d+)( \d+)?$', line)
186-
if not m:
187-
return
183+
def process_blame_hunk(self, dependent, dependent_sha1, parent,
184+
path, blame_hunk, line_to_culprit):
185+
186+
orig_line_num = blame_hunk.orig_start_line_number
187+
line_num = blame_hunk.final_start_line_number
188+
dependency_sha1 = blame_hunk.orig_commit_id.hex
189+
line_representation = f"{dependency_sha1} {orig_line_num} {line_num}"
190+
191+
self.logger.debug(f" ! {line_representation}")
188192

189-
dependency_sha1, orig_line_num, line_num = m.group(1, 2, 3)
190-
line_num = int(line_num)
191193
dependency = self.get_commit(dependency_sha1)
192-
line_to_culprit[line_num] = dependency.hex
194+
for i in range(blame_hunk.lines_in_hunk):
195+
line_to_culprit[line_num + i] = dependency.hex
193196

194197
if self.is_excluded(dependency):
195198
self.logger.debug(
@@ -206,7 +209,7 @@ def process_hunk_line(self, dependent, dependent_sha1, parent,
206209
self.record_dependency_source(parent,
207210
dependent, dependent_sha1,
208211
dependency, dependency_sha1,
209-
path, line_num, line)
212+
path, line_num, line_representation)
210213

211214
def debug_hunk(self, line_range_before, line_range_after, hunk,
212215
line_to_culprit):
@@ -234,13 +237,16 @@ def register_new_dependent(self, dependent, dependent_sha1):
234237
self.notify_listeners("new_dependent", dependent)
235238

236239
def run_blame(self, hunk, parent, path):
237-
cmd = [
238-
'git', 'blame',
239-
'--porcelain',
240-
'-L', "%d,+%d" % (hunk.old_start, hunk.old_lines),
241-
parent.hex, '--', path
242-
]
243-
return subprocess.check_output(cmd, universal_newlines=True)
240+
if self.options.pygit2_blame:
241+
return self.repo.blame(path,
242+
newest_commit=parent.hex,
243+
min_line=hunk.old_start,
244+
max_line=hunk.old_start + hunk.old_lines - 1)
245+
else:
246+
return blame_via_subprocess(path,
247+
parent.hex,
248+
hunk.old_start,
249+
hunk.old_lines)
244250

245251
def is_excluded(self, commit):
246252
if self.options.exclude_commits is not None:

tests/self_test.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,21 @@ echo "Running test suite"
1313
echo "* Dependencies of 4f27a1e, a regular commit"
1414
git-deps 4f27a1e^! | sort | diff tests/expected_outputs/deps_4f27a1e -
1515

16+
echo "* Same, but via pygit2's blame algorithm"
17+
git-deps --pygit2-blame 4f27a1e^! | sort | diff tests/expected_outputs/deps_4f27a1e -
18+
1619
echo "* Dependencies of 1ba7ad5, a merge commit"
1720
git-deps 1ba7ad5^! | sort | diff tests/expected_outputs/deps_1ba7ad5 -
1821

22+
echo "* Same, but via pygit2's blame algorithm"
23+
git-deps --pygit2-blame 1ba7ad5^! | sort | diff tests/expected_outputs/deps_1ba7ad5 -
24+
1925
echo "* Dependencies of the root commit"
2026
git-deps b196757^! | sort | diff tests/expected_outputs/deps_b196757 -
2127

28+
echo "* Same, but via pygit2's blame algorithm"
29+
git-deps --pygit2-blame b196757^! | sort | diff tests/expected_outputs/deps_b196757 -
30+
2231
echo "* Recursive dependencies of a4f27a1e, a regular commit"
2332
git-deps -r 4f27a1e^! | sort | diff tests/expected_outputs/recursive_deps_4f27a1e -
2433

tests/test_blame.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
2+
from git_deps.blame import blame_via_subprocess, BlameHunk, GitRef
3+
4+
def test_blame_via_subprocess():
5+
hunks = list(blame_via_subprocess(
6+
'INSTALL.md',
7+
'04f5c095d4eccf5808db6dbf90c31a535f7f371c',
8+
12, 4))
9+
10+
expected_hunks = [
11+
BlameHunk(
12+
GitRef('6e23a48f888a355ad7e101c797ce1b66c4b7b86a'),
13+
orig_start_line_number=12,
14+
final_start_line_number=12,
15+
lines_in_hunk=2),
16+
BlameHunk(
17+
GitRef('2c9d23b0291157eb1096384ff76e0122747b9bdf'),
18+
orig_start_line_number=10,
19+
final_start_line_number=14,
20+
lines_in_hunk=2)
21+
]
22+
23+
assert hunks == expected_hunks
24+

0 commit comments

Comments
 (0)