Skip to content

Commit 9e4144e

Browse files
committed
🤖 fix: update tb.lock before resume to apply concurrency changes immediately
Addresses Codex feedback: Previously, concurrency adjustments were written to tb.lock AFTER a burst completed, but the next resume command would read the tb.lock at the START of the burst. This created a 1-burst delay where the old concurrency was used even after adjustment. Now updates tb.lock BEFORE calling 'terminal-bench runs resume', ensuring the new concurrency takes effect immediately. This is critical when the system is overloaded - we need to reduce concurrency on the very next burst, not one burst later. Flow before fix: Burst N completes → adjust_concurrency() → write tb.lock Burst N+1 starts → resume reads OLD tb.lock value Flow after fix: adjust_concurrency() completes Burst N+1 starts → write tb.lock → resume reads NEW tb.lock value
1 parent fbc0b46 commit 9e4144e

File tree

2 files changed

+17
-19
lines changed

2 files changed

+17
-19
lines changed

benchmarks/terminal_bench/adaptive_bench.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@
1919
class AdaptiveBench:
2020
"""
2121
Adaptive concurrency wrapper for terminal-bench.
22-
22+
2323
Concurrency is automatically bounded to [1, 16] for optimal performance
2424
across different hardware configurations.
2525
"""
26-
26+
2727
MIN_CONCURRENT = 1
2828
MAX_CONCURRENT = 16
29-
29+
3030
def __init__(
3131
self,
3232
load_threshold: float,
@@ -90,8 +90,12 @@ def adjust_concurrency(self) -> bool:
9090
self.current_concurrent = min(
9191
self.current_concurrent * 2, self.MAX_CONCURRENT
9292
)
93-
elif load > self.load_threshold and self.current_concurrent > self.MIN_CONCURRENT:
94-
self.current_concurrent = max(self.current_concurrent // 2, self.MIN_CONCURRENT)
93+
elif (
94+
load > self.load_threshold and self.current_concurrent > self.MIN_CONCURRENT
95+
):
96+
self.current_concurrent = max(
97+
self.current_concurrent // 2, self.MIN_CONCURRENT
98+
)
9599

96100
if self.current_concurrent != old_concurrent:
97101
print(
@@ -124,7 +128,11 @@ def run_burst(self) -> int:
124128
f"concurrency={self.current_concurrent}"
125129
)
126130
else:
127-
# Subsequent bursts - resume existing run
131+
# Subsequent bursts - update tb.lock BEFORE resume
132+
# This ensures the resume command picks up the new concurrency
133+
self._update_lock_concurrency()
134+
135+
# Resume existing run
128136
cmd = [
129137
"uvx",
130138
"terminal-bench",
@@ -166,10 +174,6 @@ def run_burst(self) -> int:
166174

167175
print(f"⏱️ Burst #{self.burst_count} completed in {burst_duration:.1f}s")
168176

169-
# Update n_concurrent in tb.lock for next resume
170-
if self.run_id and result.returncode == 0:
171-
self._update_lock_concurrency()
172-
173177
return result.returncode
174178

175179
def _update_lock_concurrency(self):
@@ -184,9 +188,7 @@ def _update_lock_concurrency(self):
184188

185189
# Update concurrency in lock file
186190
if "run_config" in lock_data:
187-
lock_data["run_config"][
188-
"n_concurrent_trials"
189-
] = self.current_concurrent
191+
lock_data["run_config"]["n_concurrent_trials"] = self.current_concurrent
190192

191193
with open(lock_path, "w") as f:
192194
json.dump(lock_data, f, indent=2)

benchmarks/terminal_bench/adaptive_bench_test.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -166,9 +166,7 @@ def test_get_run_status_with_results(self, mock_exists, mock_file):
166166

167167
# Mock tb.lock with 5 tasks
168168
tb_lock_data = {
169-
"dataset": {
170-
"task_ids": ["task1", "task2", "task3", "task4", "task5"]
171-
}
169+
"dataset": {"task_ids": ["task1", "task2", "task3", "task4", "task5"]}
172170
}
173171

174172
# Mock results.json with 3 completed tasks
@@ -250,9 +248,7 @@ def test_update_lock_concurrency(self, mock_exists, mock_file):
250248

251249
mock_exists.return_value = True
252250

253-
lock_data = {
254-
"run_config": {"n_concurrent_trials": 1, "other_field": "value"}
255-
}
251+
lock_data = {"run_config": {"n_concurrent_trials": 1, "other_field": "value"}}
256252

257253
# Setup mock to return lock_data on read
258254
mock_file.return_value.read.return_value = json.dumps(lock_data)

0 commit comments

Comments
 (0)