Skip to content

Commit f4b9976

Browse files
committed
Add git history commit and author statistics
1 parent 2f8664e commit f4b9976

File tree

3 files changed

+173
-0
lines changed

3 files changed

+173
-0
lines changed
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
// List git file directories and their statistics
2+
3+
MATCH (git_repository:Git&Repository)-[:HAS_FILE]->(git_file:Git&File&!Repository)
4+
WHERE git_file.deletedAt IS NULL // filter out deleted files
5+
ORDER BY git_file.relativePath
6+
WITH *
7+
,datetime.fromepochMillis(git_file.createdAtEpoch) AS fileCreatedAtTimestamp
8+
,datetime.fromepochMillis(coalesce(git_file.lastModificationAtEpoch, git_file.createdAtEpoch)) AS fileLastModificationAtTimestamp
9+
WITH *, git_repository.name + '/' + git_file.relativePath AS filePath
10+
WITH *, split(filePath, '/') AS pathElements
11+
WITH *, pathElements[-1] AS fileName
12+
MATCH (git_commit:Git&Commit)-[:CONTAINS_CHANGE]->(git_change:Git&Change)-->(old_files_included:Git&File&!Repository)-[:HAS_NEW_NAME*0..3]->(git_file)
13+
WITH pathElements
14+
,fileCreatedAtTimestamp
15+
,fileLastModificationAtTimestamp
16+
,fileName
17+
,filePath AS fileRelativePath
18+
,split(git_commit.author, ' <')[0] AS author
19+
,split(split(git_commit.author, ' <')[1], '>')[0] AS authorEmail
20+
,git_commit.author AS authorFull
21+
,max(git_commit.sha) AS maxCommitSha
22+
,COUNT(DISTINCT git_commit.sha) AS commitCount
23+
,date(max(git_commit.date)) AS lastCommitDate
24+
ORDER BY filePath ASCENDING, commitCount DESCENDING
25+
UNWIND pathElements AS pathElement
26+
WITH *
27+
,coalesce(nullif(split(fileRelativePath, '/' + pathElement)[0], fileRelativePath), '') AS parent
28+
WITH *
29+
,coalesce(nullif(parent,'') + '/', '') + pathElement AS directory
30+
WHERE pathElement <> fileName
31+
WITH directory AS directoryPath
32+
,split(directory, '/')[-1] AS directoryName
33+
,parent AS directoryParentPath
34+
,split(parent, '/')[-1] AS directoryParentName
35+
,size(split(directory, '/')) AS directoryPathLength
36+
,author
37+
,authorEmail
38+
,authorFull
39+
,count(DISTINCT fileRelativePath) AS fileCount
40+
,max(date(fileCreatedAtTimestamp) ) AS lastCreationDate
41+
,max(date(fileLastModificationAtTimestamp)) AS lastModificationDate
42+
,sum(commitCount) AS commitCount
43+
,max(maxCommitSha) AS maxCommitSha
44+
,max(lastCommitDate) AS lastCommitDate
45+
,max(fileRelativePath) AS maxFileRelativePath
46+
,duration.inDays(max(lastCommitDate), date()).days AS daysSinceLastCommit
47+
,duration.inDays(max(fileCreatedAtTimestamp), datetime()).days AS daysSinceLastCreation
48+
,duration.inDays(max(fileLastModificationAtTimestamp), datetime()).days AS daysSinceLastModification
49+
// Assure that the authors are ordered by their commit count descending per directory
50+
ORDER BY directoryPath ASCENDING, commitCount DESCENDING
51+
WITH directoryPath
52+
,directoryName
53+
,directoryParentPath
54+
,directoryParentName
55+
,directoryPathLength
56+
,collect(author)[0] AS mainAuthor
57+
,collect(authorEmail)[0] AS mainAuthorEmail
58+
,collect(authorFull)[0] AS mainAuthorFull
59+
,collect(author)[1] AS secondAuthor
60+
,collect(authorEmail)[1] AS secondAuthorEmail
61+
,collect(authorFull)[1] AS secondAuthorFull
62+
,collect(author)[2] AS thirdAuthor
63+
,collect(authorEmail)[2] AS thirdAuthorEmail
64+
,collect(authorFull)[2] AS thirdAuthorFull
65+
,count(DISTINCT authorFull) AS authorCount
66+
,sum(fileCount) AS fileCount
67+
,sum(commitCount) AS commitCount
68+
,max(lastCreationDate) AS lastCreationDate
69+
,max(lastModificationDate) AS lastModificationDate
70+
,max(maxCommitSha) AS maxCommitSha
71+
,max(lastCommitDate) AS lastCommitDate
72+
,min(daysSinceLastCommit) AS daysSinceLastCommit
73+
,min(daysSinceLastCreation) AS daysSinceLastCreation
74+
,min(daysSinceLastModification) AS daysSinceLastModification
75+
,max(maxFileRelativePath) AS maxFileRelativePath
76+
// The final results are grouped by the statistic values like file count,...
77+
RETURN collect(directoryPath)[-1] AS directoryPath
78+
,apoc.text.join(collect(directoryName), '/') AS directoryName
79+
,collect(directoryParentPath)[0] AS directoryParentPath
80+
,collect(directoryParentName)[0] AS directoryParentName
81+
,mainAuthor
82+
,mainAuthorEmail
83+
,mainAuthorFull
84+
,secondAuthor
85+
,secondAuthorEmail
86+
,secondAuthorFull
87+
,thirdAuthor
88+
,thirdAuthorEmail
89+
,thirdAuthorFull
90+
,authorCount
91+
,fileCount
92+
,commitCount
93+
,lastCreationDate
94+
,lastModificationDate
95+
,lastCommitDate
96+
,daysSinceLastCommit
97+
,daysSinceLastCreation
98+
,daysSinceLastModification
99+
,maxCommitSha
100+
,maxFileRelativePath
101+
,max(directoryPathLength) AS directoryPathLength
102+
,count(DISTINCT directoryPath) AS combinedDirectoriesCount
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// List git file directories and their commit statistics by author sorted by the number of commits descending
2+
3+
MATCH (git_repository:Git&Repository)-[:HAS_FILE]->(git_file:Git&File&!Repository)
4+
WHERE git_file.deletedAt IS NULL // filter out deleted files
5+
ORDER BY git_file.relativePath
6+
WITH *
7+
,datetime.fromepochMillis(git_file.createdAtEpoch) AS fileCreatedAtTimestamp
8+
,datetime.fromepochMillis(coalesce(git_file.lastModificationAtEpoch, git_file.createdAtEpoch)) AS fileLastModificationAtTimestamp
9+
WITH *, git_repository.name + '/' + git_file.relativePath AS filePath
10+
WITH *, split(filePath, '/') AS pathElements
11+
WITH *, pathElements[-1] AS fileName
12+
MATCH (git_commit:Git&Commit)-[:CONTAINS_CHANGE]->(git_change:Git&Change)-->(old_files_included:Git&File&!Repository)-[:HAS_NEW_NAME*0..3]->(git_file)
13+
WITH pathElements
14+
,fileCreatedAtTimestamp
15+
,fileLastModificationAtTimestamp
16+
,fileName
17+
,filePath AS fileRelativePath
18+
,split(git_commit.author, ' <')[0] AS author
19+
,split(split(git_commit.author, ' <')[1], '>')[0] AS authorEmail
20+
,git_commit.author AS authorFull
21+
,max(git_commit.sha) AS maxCommitSha
22+
,COUNT(DISTINCT git_commit.sha) AS commitCount
23+
,date(max(git_commit.date)) AS lastCommitDate
24+
UNWIND pathElements AS pathElement
25+
WITH *
26+
,coalesce(nullif(split(fileRelativePath, '/' + pathElement)[0], fileRelativePath), '') AS parent
27+
WITH *
28+
,coalesce(nullif(parent,'') + '/', '') + pathElement AS directory
29+
WHERE pathElement <> fileName
30+
WITH directory AS directoryPath
31+
,split(directory, '/')[-1] AS directoryName
32+
,parent AS directoryParentPath
33+
,split(parent, '/')[-1] AS directoryParentName
34+
,size(split(directory, '/')) AS directoryPathLength
35+
,author
36+
,authorEmail
37+
,authorFull
38+
,count(DISTINCT fileRelativePath) AS fileCount
39+
,max(date(fileCreatedAtTimestamp) ) AS lastCreationDate
40+
,max(date(fileLastModificationAtTimestamp)) AS lastModificationDate
41+
,sum(commitCount) AS commitCount
42+
,max(maxCommitSha) AS maxCommitSha
43+
,max(lastCommitDate) AS lastCommitDate
44+
,max(fileRelativePath) AS maxFileRelativePath
45+
,duration.inDays(max(lastCommitDate), date()).days AS daysSinceLastCommit
46+
,duration.inDays(max(fileCreatedAtTimestamp), datetime()).days AS daysSinceLastCreation
47+
,duration.inDays(max(fileLastModificationAtTimestamp), datetime()).days AS daysSinceLastModification
48+
// Assure that the authors are ordered by their commit count descending per directory
49+
ORDER BY directoryPath ASCENDING, commitCount DESCENDING
50+
// The final results are grouped by the statistic values like file count,...
51+
RETURN collect(directoryPath)[-1] AS directoryPath
52+
,apoc.text.join(collect(directoryName), '/') AS directoryName
53+
,collect(directoryParentPath)[0] AS directoryParentPath
54+
,collect(directoryParentName)[0] AS directoryParentName
55+
,author
56+
,authorEmail
57+
,authorFull
58+
,fileCount
59+
,commitCount
60+
,lastCreationDate
61+
,lastModificationDate
62+
,lastCommitDate
63+
,daysSinceLastCommit
64+
,daysSinceLastCreation
65+
,daysSinceLastModification
66+
,maxCommitSha
67+
,maxFileRelativePath
68+
,max(directoryPathLength) AS directoryPathLength
69+
,count(DISTINCT directoryPath) AS combinedDirectoriesCount

scripts/reports/GitHistoryCsv.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ echo "GitHistoryCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing git history..."
4242
# Detailed git file statistics
4343
execute_cypher "${GIT_LOG_CYPHER_DIR}/List_git_files_with_commit_statistics_by_author.cypher" > "${FULL_REPORT_DIRECTORY}/List_git_files_with_commit_statistics_by_author.csv"
4444
execute_cypher "${GIT_LOG_CYPHER_DIR}/List_git_files_that_were_changed_together_with_another_file.cypher" > "${FULL_REPORT_DIRECTORY}/List_git_files_that_were_changed_together_with_another_file.csv"
45+
execute_cypher "${GIT_LOG_CYPHER_DIR}/List_git_file_directories_with_commit_statistics_by_author.cypher" > "${FULL_REPORT_DIRECTORY}/List_git_file_directories_with_commit_statistics_by_author.csv"
46+
execute_cypher "${GIT_LOG_CYPHER_DIR}/List_git_file_directories_with_commit_statistics.cypher" > "${FULL_REPORT_DIRECTORY}/List_git_file_directories_with_commit_statistics.csv"
4547

4648
# Overall distribution of how many files were changed with one git commit, how many were changed with two, etc.
4749
execute_cypher "${GIT_LOG_CYPHER_DIR}/List_git_files_per_commit_distribution.cypher" > "${FULL_REPORT_DIRECTORY}/List_git_files_per_commit_distribution.csv"

0 commit comments

Comments
 (0)