Skip to content

Commit 44eef4b

Browse files
craig[bot]wenyihu6
andcommitted
Merge #155131
155131: asim: add asimdiff r=wenyihu6 a=wenyihu6 This commit introduces a adds a python script that pipes git diffs to compare certain metrics sum diff. Generated by claude. Sample usage: ``` git diff | python3 pkg/kv/kvserver/asim/tests/cmd/asimdiff.py thrash_pct git diff head~1 head | python3 pkg/kv/kvserver/asim/tests/cmd/asimdiff.py last --metric mean ``` Sample output: ``` [skewed_cpu_skewed_write.txt] before = 249%, after = 249%, diff = +0% cpu_util#5: thrash_pct: [s1=40%, s2=76%, s3=49%, s4=30%, s5=33%, s6=21%] (sum=249%) ... total diff = -5374% (total before = 498950%, total after = 493576%) percent change = -1.1% avg diff = -22.7%, max diff = +7824%, min diff = -5509% changes: 136 regressions, 99 improvements, 2 unchanged ``` Epic: CRDB-55052 Release note: none Co-authored-by: wenyihu6 <wenyi@cockroachlabs.com>
2 parents cee71c6 + ab76a8c commit 44eef4b

File tree

1 file changed

+163
-0
lines changed

1 file changed

+163
-0
lines changed
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
# Copyright 2025 The Cockroach Authors.
2+
#
3+
# Use of this software is governed by the CockroachDB Software License
4+
# included in the /LICENSE file.
5+
6+
#!/usr/bin/env python3
7+
import re
8+
import sys
9+
import argparse
10+
11+
def extract_value(line, metric):
12+
"""Extract the value for a specific metric (sum, mean, or stddev) from a line"""
13+
if metric == 'sum':
14+
pattern = r'sum=(\d+)%?'
15+
elif metric == 'mean':
16+
pattern = r'mean=([\d.]+)%?'
17+
elif metric == 'stddev':
18+
pattern = r'stddev=([\d.]+)%?'
19+
else:
20+
return None
21+
22+
match = re.search(pattern, line)
23+
if match:
24+
value = match.group(1)
25+
# Return as float for mean/stddev, int for sum
26+
return float(value) if metric in ['mean', 'stddev'] else int(value)
27+
return None
28+
29+
def process_diff(diff_content, keyword, metric):
30+
"""Process git diff and extract changes for lines containing keyword"""
31+
lines = diff_content.split('\n')
32+
results = []
33+
total_before = 0
34+
total_after = 0
35+
current_file = None
36+
37+
for line in lines:
38+
# Track current file being processed
39+
if line.startswith('diff --git') or line.startswith('+++'):
40+
if line.startswith('+++'):
41+
# Extract filename from +++ b/path/to/file
42+
match = re.search(r'\+\+\+ b/(.+)', line)
43+
if match:
44+
current_file = match.group(1)
45+
46+
if keyword not in line:
47+
continue
48+
49+
if line.startswith('-') and not line.startswith('---'):
50+
# Removed line (before)
51+
value = extract_value(line, metric)
52+
if value is not None:
53+
total_before += value
54+
results.append({
55+
'type': 'before',
56+
'line': line[1:].strip(),
57+
'value': value,
58+
'file': current_file
59+
})
60+
elif line.startswith('+') and not line.startswith('+++'):
61+
# Added line (after)
62+
value = extract_value(line, metric)
63+
if value is not None:
64+
total_after += value
65+
results.append({
66+
'type': 'after',
67+
'line': line[1:].strip(),
68+
'value': value,
69+
'file': current_file
70+
})
71+
72+
return results, total_before, total_after
73+
74+
def print_results(results, total_before, total_after, metric):
75+
"""Print formatted results"""
76+
# Group before/after pairs
77+
before_lines = [r for r in results if r['type'] == 'before']
78+
after_lines = [r for r in results if r['type'] == 'after']
79+
80+
# Collect individual diffs for statistics and sorting
81+
entries = []
82+
83+
# Pair up changes and calculate diffs
84+
max_len = max(len(before_lines), len(after_lines))
85+
for i in range(max_len):
86+
before_val = before_lines[i]['value'] if i < len(before_lines) else 0
87+
after_val = after_lines[i]['value'] if i < len(after_lines) else 0
88+
diff = after_val - before_val
89+
90+
full_path = after_lines[i]['file'] if i < len(after_lines) else (before_lines[i]['file'] if i < len(before_lines) else 'unknown')
91+
filename = full_path.split('/')[-1] if full_path else 'unknown'
92+
line_content = after_lines[i]['line'] if i < len(after_lines) else ''
93+
94+
entries.append({
95+
'filename': filename,
96+
'before': before_val,
97+
'after': after_val,
98+
'diff': diff,
99+
'line': line_content
100+
})
101+
102+
# Sort by absolute value of diff (largest to smallest)
103+
entries.sort(key=lambda x: abs(x['diff']), reverse=True)
104+
105+
# Determine format string based on metric type
106+
if metric == 'sum':
107+
fmt = lambda x: f"{x:.0f}"
108+
else: # mean or stddev
109+
fmt = lambda x: f"{x:.2f}"
110+
111+
# Print sorted entries
112+
diffs = []
113+
for entry in entries:
114+
diffs.append(entry['diff'])
115+
pct_diff = (entry['diff'] / entry['before'] * 100) if entry['before'] != 0 else 0
116+
print(f"[{entry['filename']}] before = {fmt(entry['before'])}, after = {fmt(entry['after'])}, diff = {entry['diff']:+.2f} ({pct_diff:+.1f}%)")
117+
print(f" {entry['line']}")
118+
119+
# Calculate statistics
120+
total_diff = total_after - total_before
121+
avg_diff = sum(diffs) / len(diffs) if diffs else 0
122+
max_diff = max(diffs) if diffs else 0
123+
min_diff = min(diffs) if diffs else 0
124+
125+
# Calculate percent change
126+
pct_change = (total_diff / total_before * 100) if total_before > 0 else 0
127+
128+
# Count regressions and improvements
129+
regressions = sum(1 for d in diffs if d > 0)
130+
improvements = sum(1 for d in diffs if d < 0)
131+
unchanged = sum(1 for d in diffs if d == 0)
132+
133+
print(f"total diff = {total_diff:+.2f} (total before = {fmt(total_before)}, total after = {fmt(total_after)})")
134+
print(f"percent change = {pct_change:+.1f}%")
135+
print(f"avg diff = {avg_diff:+.2f}, max diff = {max_diff:+.2f}, min diff = {min_diff:+.2f}")
136+
print(f"changes: {regressions} regressions, {improvements} improvements, {unchanged} unchanged")
137+
138+
if __name__ == "__main__":
139+
parser = argparse.ArgumentParser(
140+
description='Analyze git diff for lines containing a keyword with metric values',
141+
epilog='Example: git diff | python %(prog)s thrash_pct --metric sum'
142+
)
143+
parser.add_argument('keyword', help='Keyword to search for in diff lines (e.g., thrash_pct)')
144+
parser.add_argument('--metric', '-m', choices=['sum', 'mean', 'stddev'], default='sum',
145+
help='Metric to extract and compare (default: sum)')
146+
parser.add_argument('--file', '-f', help='Diff file to read (if not provided, reads from stdin)')
147+
148+
args = parser.parse_args()
149+
150+
# Read from stdin or file
151+
if args.file:
152+
with open(args.file, 'r') as f:
153+
diff_content = f.read()
154+
else:
155+
diff_content = sys.stdin.read()
156+
157+
results, total_before, total_after = process_diff(diff_content, args.keyword, args.metric)
158+
159+
if not results:
160+
print(f"No lines containing '{args.keyword}' with {args.metric} values found in the diff!")
161+
sys.exit(1)
162+
163+
print_results(results, total_before, total_after, args.metric)

0 commit comments

Comments
 (0)