Skip to content

Commit 9c2eaa7

Browse files
authored
Merge pull request #341 from vmarkovtsev/master
Add exact signature id matching
2 parents bd565d1 + 464dc7f commit 9c2eaa7

File tree

3 files changed

+113
-48
lines changed

3 files changed

+113
-48
lines changed

internal/plumbing/identity/identity.go

Lines changed: 67 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ type Detector struct {
2121
PeopleDict map[string]int
2222
// ReversedPeopleDict maps developer id -> description
2323
ReversedPeopleDict []string
24+
// ExactSignatures chooses the matching algorithm: opportunistic email || name
25+
// or exact email && name
26+
ExactSignatures bool
2427

2528
l core.Logger
2629
}
@@ -43,6 +46,10 @@ const (
4346
// ConfigIdentityDetectorPeopleDictPath is the name of the configuration option
4447
// (Detector.Configure()) which allows to set the external PeopleDict mapping from a file.
4548
ConfigIdentityDetectorPeopleDictPath = "IdentityDetector.PeopleDictPath"
49+
// ConfigIdentityDetectorExactSignatures is the name of the configuration option
50+
// (Detector.Configure()) which changes the matching algorithm to exact signature (name + email)
51+
// correspondence.
52+
ConfigIdentityDetectorExactSignatures = "IdentityDetector.ExactSignatures"
4653
// FactIdentityDetectorPeopleCount is the name of the fact which is inserted in
4754
// Detector.Configure(). It is equal to the overall number of unique authors
4855
// (the length of ReversedPeopleDict).
@@ -78,7 +85,13 @@ func (detector *Detector) ListConfigurationOptions() []core.ConfigurationOption
7885
Description: "Path to the file with developer -> name|email associations.",
7986
Flag: "people-dict",
8087
Type: core.PathConfigurationOption,
81-
Default: ""},
88+
Default: ""}, {
89+
Name: ConfigIdentityDetectorExactSignatures,
90+
Description: "Disable separate name/email matching. This will lead to considerbly more " +
91+
"identities and should not be normally used.",
92+
Flag: "exact-signatures",
93+
Type: core.BoolConfigurationOption,
94+
Default: false},
8295
}
8396
return options[:]
8497
}
@@ -96,6 +109,9 @@ func (detector *Detector) Configure(facts map[string]interface{}) error {
96109
if val, exists := facts[FactIdentityDetectorReversedPeopleDict].([]string); exists {
97110
detector.ReversedPeopleDict = val
98111
}
112+
if val, exists := facts[ConfigIdentityDetectorExactSignatures].(bool); exists {
113+
detector.ExactSignatures = val
114+
}
99115
if detector.PeopleDict == nil || detector.ReversedPeopleDict == nil {
100116
peopleDictPath, _ := facts[ConfigIdentityDetectorPeopleDictPath].(string)
101117
if peopleDictPath != "" {
@@ -133,13 +149,19 @@ func (detector *Detector) Initialize(repository *git.Repository) error {
133149
// in Provides(). If there was an error, nil is returned.
134150
func (detector *Detector) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
135151
commit := deps[core.DependencyCommit].(*object.Commit)
152+
var authorID int
153+
var exists bool
136154
signature := commit.Author
137-
authorID, exists := detector.PeopleDict[strings.ToLower(signature.Email)]
138-
if !exists {
139-
authorID, exists = detector.PeopleDict[strings.ToLower(signature.Name)]
155+
if !detector.ExactSignatures {
156+
authorID, exists = detector.PeopleDict[strings.ToLower(signature.Email)]
140157
if !exists {
141-
authorID = AuthorMissing
158+
authorID, exists = detector.PeopleDict[strings.ToLower(signature.Name)]
142159
}
160+
} else {
161+
authorID, exists = detector.PeopleDict[strings.ToLower(signature.String())]
162+
}
163+
if !exists {
164+
authorID = AuthorMissing
143165
}
144166
return map[string]interface{}{DependencyAuthor: authorID}, nil
145167
}
@@ -184,7 +206,8 @@ func (detector *Detector) GeneratePeopleDict(commits []*object.Commit) {
184206
size := 0
185207

186208
mailmapFile, err := commits[len(commits)-1].File(".mailmap")
187-
if err == nil {
209+
// TODO(vmarkovtsev): properly handle .mailmap if ExactSignatures
210+
if !detector.ExactSignatures && err == nil {
188211
mailMapContents, err := mailmapFile.Contents()
189212
if err == nil {
190213
mailmap := ParseMailmap(mailMapContents)
@@ -239,34 +262,48 @@ func (detector *Detector) GeneratePeopleDict(commits []*object.Commit) {
239262
}
240263

241264
for _, commit := range commits {
242-
email := strings.ToLower(commit.Author.Email)
243-
name := strings.ToLower(commit.Author.Name)
244-
id, exists := dict[email]
245-
if exists {
246-
_, exists := dict[name]
247-
if !exists {
248-
dict[name] = id
249-
names[id] = append(names[id], name)
265+
if !detector.ExactSignatures {
266+
email := strings.ToLower(commit.Author.Email)
267+
name := strings.ToLower(commit.Author.Name)
268+
id, exists := dict[email]
269+
if exists {
270+
_, exists := dict[name]
271+
if !exists {
272+
dict[name] = id
273+
names[id] = append(names[id], name)
274+
}
275+
continue
276+
}
277+
id, exists = dict[name]
278+
if exists {
279+
dict[email] = id
280+
emails[id] = append(emails[id], email)
281+
continue
282+
}
283+
dict[email] = size
284+
dict[name] = size
285+
emails[size] = append(emails[size], email)
286+
names[size] = append(names[size], name)
287+
size++
288+
} else { // !detector.ExactSignatures
289+
sig := strings.ToLower(commit.Author.String())
290+
if _, exists := dict[sig]; !exists {
291+
dict[sig] = size
292+
size++
250293
}
251-
continue
252-
}
253-
id, exists = dict[name]
254-
if exists {
255-
dict[email] = id
256-
emails[id] = append(emails[id], email)
257-
continue
258294
}
259-
dict[email] = size
260-
dict[name] = size
261-
emails[size] = append(emails[size], email)
262-
names[size] = append(names[size], name)
263-
size++
264295
}
265296
reverseDict := make([]string, size)
266-
for _, val := range dict {
267-
sort.Strings(names[val])
268-
sort.Strings(emails[val])
269-
reverseDict[val] = strings.Join(names[val], "|") + "|" + strings.Join(emails[val], "|")
297+
if !detector.ExactSignatures {
298+
for _, val := range dict {
299+
sort.Strings(names[val])
300+
sort.Strings(emails[val])
301+
reverseDict[val] = strings.Join(names[val], "|") + "|" + strings.Join(emails[val], "|")
302+
}
303+
} else {
304+
for key, val := range dict {
305+
reverseDict[val] = key
306+
}
270307
}
271308
detector.PeopleDict = dict
272309
detector.ReversedPeopleDict = reverseDict

internal/plumbing/identity/identity_test.go

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,9 @@ func TestIdentityDetectorMeta(t *testing.T) {
3939
assert.Equal(t, len(id.Provides()), 1)
4040
assert.Equal(t, id.Provides()[0], DependencyAuthor)
4141
opts := id.ListConfigurationOptions()
42-
assert.Len(t, opts, 1)
42+
assert.Len(t, opts, 2)
4343
assert.Equal(t, opts[0].Name, ConfigIdentityDetectorPeopleDictPath)
44+
assert.Equal(t, opts[1].Name, ConfigIdentityDetectorExactSignatures)
4445
logger := core.NewLogger()
4546
assert.NoError(t, id.Configure(map[string]interface{}{
4647
core.ConfigLogger: logger,
@@ -150,6 +151,28 @@ func TestIdentityDetectorConsume(t *testing.T) {
150151
assert.Equal(t, res[DependencyAuthor].(int), AuthorMissing)
151152
}
152153

154+
func TestIdentityDetectorConsumeExact(t *testing.T) {
155+
commit, _ := test.Repository.CommitObject(plumbing.NewHash(
156+
"5c0e755dd85ac74584d9988cc361eccf02ce1a48"))
157+
deps := map[string]interface{}{}
158+
deps[core.DependencyCommit] = commit
159+
id := fixtureIdentityDetector()
160+
id.ExactSignatures = true
161+
id.PeopleDict = map[string]int{
162+
"vadim markovtsev <gmarkhor@gmail.com>": 0,
163+
"vadim markovtsev <vadim@sourced.tech>": 1,
164+
}
165+
res, err := id.Consume(deps)
166+
assert.Nil(t, err)
167+
assert.Equal(t, res[DependencyAuthor].(int), 1)
168+
commit, _ = test.Repository.CommitObject(plumbing.NewHash(
169+
"8a03b5620b1caa72ec9cb847ea88332621e2950a"))
170+
deps[core.DependencyCommit] = commit
171+
res, err = id.Consume(deps)
172+
assert.Nil(t, err)
173+
assert.Equal(t, res[DependencyAuthor].(int), AuthorMissing)
174+
}
175+
153176
func TestIdentityDetectorLoadPeopleDict(t *testing.T) {
154177
id := fixtureIdentityDetector()
155178
err := id.LoadPeopleDict(path.Join("..", "..", "test_data", "identities"))
@@ -175,22 +198,6 @@ func TestIdentityDetectorLoadPeopleDictWrongPath(t *testing.T) {
175198
assert.NotNil(t, err)
176199
}
177200

178-
/*
179-
// internal compiler error in 1.8
180-
func TestGeneratePeopleDict(t *testing.T) {
181-
id := fixtureIdentityDetector()
182-
commits := make([]*object.Commit, 0)
183-
iter, err := test.Repository.CommitObjects()
184-
for ; err != io.EOF; commit, err := iter.Next() {
185-
if err != nil {
186-
panic(err)
187-
}
188-
commits = append(commits, commit)
189-
}
190-
id.GeneratePeopleDict(commits)
191-
}
192-
*/
193-
194201
func TestIdentityDetectorGeneratePeopleDict(t *testing.T) {
195202
id := fixtureIdentityDetector()
196203
commits := make([]*object.Commit, 0)
@@ -244,6 +251,27 @@ func TestIdentityDetectorGeneratePeopleDict(t *testing.T) {
244251
assert.NotEqual(t, id.ReversedPeopleDict[len(id.ReversedPeopleDict)-1], AuthorMissingName)
245252
}
246253

254+
func TestIdentityDetectorGeneratePeopleDictExact(t *testing.T) {
255+
id := fixtureIdentityDetector()
256+
id.ExactSignatures = true
257+
commits := make([]*object.Commit, 0)
258+
iter, err := test.Repository.CommitObjects()
259+
commit, err := iter.Next()
260+
for ; err != io.EOF; commit, err = iter.Next() {
261+
if err != nil {
262+
panic(err)
263+
}
264+
commits = append(commits, commit)
265+
}
266+
id.GeneratePeopleDict(commits)
267+
ass := assert.New(t)
268+
ass.Equal(len(id.PeopleDict), len(id.ReversedPeopleDict))
269+
ass.True(len(id.ReversedPeopleDict) >= 24)
270+
ass.Contains(id.PeopleDict, "vadim markovtsev <vadim@sourced.tech>")
271+
ass.Contains(id.PeopleDict, "vadim markovtsev <vadim@athenian.co>")
272+
ass.NotEqual(id.ReversedPeopleDict[len(id.ReversedPeopleDict)-1], AuthorMissingName)
273+
}
274+
247275
func TestIdentityDetectorLoadPeopleDictInvalidPath(t *testing.T) {
248276
id := fixtureIdentityDetector()
249277
ipath := "/xxxyyyzzzInvalidPath!hehe"

python/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
description="Python companion for github.com/src-d/hercules to visualize the results.",
2323
long_description=long_description,
2424
long_description_content_type="text/markdown",
25-
version="10.7.0",
25+
version="10.7.1",
2626
license="Apache-2.0",
2727
author="source{d}",
2828
author_email="machine-learning@sourced.tech",

0 commit comments

Comments
 (0)