Skip to content

Commit 5bd3e68

Browse files
committed
Add histogram plots for pairwise changed git files
1 parent e992d85 commit 5bd3e68

7 files changed

+334
-26
lines changed

cypher/General_Enrichment/Add_file_name and_extension.cypher

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
// Add "name", "extension" and "extensionExtended" properties to File nodes
1+
// Add "name", "extension" and "extensionExtended" properties to File nodes. Supports Git:File nodes with "relativePath" property.
22

33
MATCH (file:File)
4-
WHERE file.fileName IS NOT NULL
4+
WHERE (file.fileName IS NOT NULL OR file.relativePath IS NOT NULL)
55
AND file.name IS NULL // Don't override an already existing "name" property
66
WITH *
7-
,file.fileName AS fileName
7+
,coalesce(file.fileName, file.relativePath) AS fileName
88
WITH *
99
,last(split(fileName, '/')) AS fileNameWithoutPath
1010
WITH *

cypher/GitLog/Add_CHANGED_TOGETHER_WITH_relationships_to_git_files.cypher

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,8 @@ UNWIND fileCombinations AS fileCombination
3232
,count(DISTINCT commitHash) AS updateCommitCount
3333
,collect(DISTINCT commitHash) AS updateCommitHashes
3434
// Deactivated:
35-
// Filter out file pairs that where changed not very often together
36-
// In detail: More than 0.1 per mille compared to overall commit count
37-
// WHERE updateCommitCount > globalUpdateCommitCount * 0.001
35+
// Filter out file pairs that weren't changed very often together
36+
WHERE updateCommitCount > 2
3837
WITH *
3938
,fileCombination[0] AS firstFile
4039
,fileCombination[1] AS secondFile
@@ -65,7 +64,7 @@ UNWIND fileCombinations AS fileCombination
6564
// Create the new relationship "CHANGED_TOGETHER_WITH" and set the property "updateCommitCount" on it
6665
CALL (firstFile, secondFile, updateCommitCount, updateCommitHashes, updateCommitMinConfidence, updateCommitSupport, updateCommitLift, updateCommitJaccardSimilarity) {
6766
MERGE (firstFile)-[pairwiseChange:CHANGED_TOGETHER_WITH]-(secondFile)
68-
SET pairwiseChange.updateCommitCount = updateCommitCount
67+
SET pairwiseChange.updateCommitCount = toInteger(updateCommitCount)
6968
,pairwiseChange.updateCommitHashes = updateCommitHashes
7069
,pairwiseChange.updateCommitMinConfidence = updateCommitMinConfidence
7170
,pairwiseChange.updateCommitSupport = updateCommitSupport

cypher/GitLog/List_git_files_that_were_changed_together_with_another_file.cypher

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
MATCH (firstGitFile:Git&File&!Repository)-[gitChange:CHANGED_TOGETHER_WITH]-(secondGitFile:Git&File&!Repository)
44
MATCH (gitRepository:Git&Repository)-[:HAS_FILE]->(firstGitFile)
5-
UNWIND gitChange.commitHashes AS commitHash
5+
UNWIND gitChange.updateCommitHashes AS commitHash
66
WITH gitRepository.name + '/' + firstGitFile.relativePath AS filePath
77
,count(DISTINCT commitHash) AS commitCount
8-
,sum(firstGitFile.updateCommitCount) AS fileUpdateCount
8+
,sum(firstGitFile.updateCommitCount) AS fileUpdateCount
99
WITH *
1010
// Out of all the times the file was touched, how often did it co-occur with other files?
1111
,CASE WHEN fileUpdateCount > 0 THEN toFloat(commitCount) / fileUpdateCount ELSE 0.0 END AS coChangeRate
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// List pairs of files that were changed together. Requires Add_CHANGED_TOGETHER_WITH_relationships_to_git_files.cypher to run first.
2+
3+
MATCH (firstFile:Git:File)-[pairwiseChange:CHANGED_TOGETHER_WITH]-(secondFile:Git:File)
4+
WHERE elementId(firstFile) < elementId(secondFile)
5+
WITH *
6+
,coalesce(firstFile.relativePath, firstFile.fileName) AS firstFileName
7+
,coalesce(secondFile.relativePath, secondFile.fileName) AS secondFileName
8+
RETURN firstFileName
9+
,secondFileName
10+
,firstFile.name + '<br>' + secondFile.name AS filePairLineBreak
11+
,firstFileName + '<br>' + secondFileName AS filePairWithRelativePathLineBreak
12+
,firstFile.name + '↔' + secondFile.name AS filePair
13+
,firstFileName + '↔' + secondFileName AS filePairWithRelativePath
14+
,firstFile.extension AS firstFileExtension
15+
,secondFile.extension AS secondFileExtension
16+
,firstFile.extension + '↔' + secondFile.extension AS fileExtensionPair
17+
,toInteger(pairwiseChange.updateCommitCount) AS updateCommitCount
18+
,pairwiseChange.updateCommitMinConfidence AS updateCommitMinConfidence
19+
,pairwiseChange.updateCommitSupport AS updateCommitSupport
20+
,pairwiseChange.updateCommitLift AS updateCommitLift
21+
,pairwiseChange.updateCommitJaccardSimilarity AS updateCommitJaccardSimilarity

cypher/GitLog/List_pairwise_changed_files_with_dependencies.cypher

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// List pair of files that were changed together and that have a declared dependency between each other.
1+
// List pair of files that were changed together and that have a declared dependency between each other. Requires Add_CHANGED_TOGETHER_WITH_relationships_to_git_files.cypher and Add_CHANGED_TOGETHER_WITH_relationships_to_code_files.cypher to run first.
22

33
MATCH (firstCodeFile:File)-[dependency:DEPENDS_ON]->(secondCodeFile:File)
44
MATCH (firstCodeFile)-[pairwiseChange:CHANGED_TOGETHER_WITH]-(secondCodeFile)

0 commit comments

Comments
 (0)