Skip to content

Commit c5806e4

Browse files
authored
[CI] Track line diff per LLVM commit (#591)
This change adds new data points to track how many lines were modified per commit, captured under a new `diff` field. This field replaces `files_modified` in our BigQuery table, as it serves the same purpose but now contains additional data regarding each file.
1 parent 02f18da commit c5806e4

File tree

2 files changed

+40
-8
lines changed

2 files changed

+40
-8
lines changed

premerge/bigquery_schema/llvm_commits_table_schema.json

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,31 @@
4848
"description": "List of GitHub users who reviewed the pull request for this commit"
4949
},
5050
{
51-
"name": "files_modified",
52-
"type": "STRING",
51+
"name": "diff",
52+
"type": "RECORD",
5353
"mode": "REPEATED",
54-
"description": "List of filepaths modified by this commit"
54+
"description": "List of files and line addition/deletion counts for this commit",
55+
"fields": [
56+
{
57+
"name": "file",
58+
"type": "STRING",
59+
"mode": "NULLABLE"
60+
},
61+
{
62+
"name": "additions",
63+
"type": "INTEGER",
64+
"mode": "NULLABLE"
65+
},
66+
{
67+
"name": "deletions",
68+
"type": "INTEGER",
69+
"mode": "NULLABLE"
70+
},
71+
{
72+
"name": "total",
73+
"type": "INTEGER",
74+
"mode": "NULLABLE"
75+
}
76+
]
5577
}
5678
]

premerge/ops-container/process_llvm_commits.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,7 @@
1919

2020
# Number of days to look back for new commits
2121
# We allow some buffer time between when a commit is made and when it is queried
22-
# for reviews. This is allow time for any events to propogate in the GitHub
23-
# Archive BigQuery tables.
22+
# for reviews. This is to allow time for any new GitHub events to propogate.
2423
LOOKBACK_DAYS = 2
2524

2625
# Template GraphQL subquery to check if a commit has an associated pull request
@@ -57,7 +56,7 @@
5756
class LLVMCommitInfo:
5857
commit_sha: str
5958
commit_timestamp_seconds: int
60-
files_modified: set[str]
59+
diff: list[dict[str, int | str]]
6160
commit_author: str = "" # GitHub username of author is unknown until API call
6261
has_pull_request: bool = False
6362
pull_request_number: int = 0
@@ -117,7 +116,15 @@ def query_for_reviews(
117116
commit.hexsha: LLVMCommitInfo(
118117
commit_sha=commit.hexsha,
119118
commit_timestamp_seconds=commit.committed_date,
120-
files_modified=set(commit.stats.files.keys()),
119+
diff=[
120+
{
121+
"file": file,
122+
"additions": line_stats["insertions"],
123+
"deletions": line_stats["deletions"],
124+
"total": line_stats["lines"],
125+
}
126+
for file, line_stats in commit.stats.files.items()
127+
],
121128
)
122129
for commit in new_commits
123130
}
@@ -210,7 +217,10 @@ def upload_daily_metrics_to_bigquery(
210217
)
211218
table = bq_client.get_table(table_ref)
212219
commit_records = [dataclasses.asdict(commit) for commit in new_commits]
213-
bq_client.insert_rows(table, commit_records)
220+
errors = bq_client.insert_rows(table, commit_records)
221+
if errors:
222+
logging.error("Failed to upload commit info to BigQuery: %s", errors)
223+
exit(1)
214224

215225

216226
def main() -> None:

0 commit comments

Comments
 (0)