diff --git a/.github/workflows/terminal-bench.yml b/.github/workflows/terminal-bench.yml
index 50cb87418..981cceedc 100644
--- a/.github/workflows/terminal-bench.yml
+++ b/.github/workflows/terminal-bench.yml
@@ -22,10 +22,10 @@ on:
         type: string
         default: '4'
       livestream:
-        description: 'Enable livestream mode (verbose output to console)'
+        description: 'Enable livestream mode'
         required: false
         type: boolean
-        default: false
+        default: true
       sample_size:
         description: 'Number of random tasks to run (empty = all tasks)'
         required: false
@@ -46,15 +46,10 @@ on:
         required: false
         default: 'terminal-bench-core==0.1.1'
         type: string
-      concurrency:
-        description: 'Number of concurrent tasks (--n-concurrent)'
-        required: false
-        default: '4'
-        type: string
       livestream:
-        description: 'Enable livestream mode (verbose output to console)'
+        description: 'Enable livestream mode'
         required: false
-        default: false
+        default: true
         type: boolean
       sample_size:
         description: 'Number of random tasks to run (empty = all tasks)'
@@ -72,15 +67,24 @@ on:
         description: 'Additional arguments to pass to terminal-bench'
         required: false
         type: string
+      load_threshold:
+        description: 'Load average threshold for adaptive concurrency (default: 1.0)'
+        required: false
+        default: '1.0'
+        type: string
+      check_interval:
+        description: 'Seconds between bursts for adaptive concurrency (default: 60)'
+        required: false
+        default: '60'
+        type: string
 
 jobs:
   benchmark:
     name: Run Terminal-Bench${{ inputs.model_name && format(' ({0})', inputs.model_name) || '' }}
     runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || 'ubuntu-latest' }}
-    # Full suite (~80 tasks) at concurrency=4 takes ~60-90 minutes typically
-    # Set 4-hour timeout to handle occasional API slowdowns while preventing infinite hangs
-    # If consistently hitting this timeout, investigate task-level issues
-    timeout-minutes: 240
+    # Full suite (~80 tasks) at concurrency=4 takes ~60-90 minutes
+    # Allow 3 hours for safety margin and slower tasks
+    timeout-minutes: 180
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
@@ -101,11 +105,12 @@ jobs:
       - name: Build dist/ (skip icons - not needed for benchmark)
         run: make build-main build-preload
 
-      - name: Run Terminal-Bench
-        run: make benchmark-terminal 2>&1 | tee benchmark.log
+      - name: Run Terminal-Bench (adaptive concurrency 1-16)
+        run: make benchmark-terminal
         env:
           TB_DATASET: ${{ inputs.dataset }}
-          TB_CONCURRENCY: ${{ inputs.concurrency }}
+          TB_LOAD_THRESHOLD: ${{ inputs.load_threshold }}
+          TB_CHECK_INTERVAL: ${{ inputs.check_interval }}
           TB_LIVESTREAM: ${{ inputs.livestream && '1' || '' }}
           TB_SAMPLE_SIZE: ${{ inputs.sample_size }}
           TB_ARGS: ${{ inputs.model_name && format('--agent-kwarg model_name={0} --agent-kwarg thinking_level={1} {2}', inputs.model_name, inputs.thinking_level, inputs.extra_args) || inputs.extra_args }}
@@ -116,12 +121,18 @@ jobs:
         if: always()
         run: |
           echo "=== Terminal-Bench Results Summary ==="
-          if [ -f "$(find runs -name 'results.json' 2>/dev/null | head -1)" ]; then
+          if [ -f "$(find runs -name 'results.json' | head -1)" ]; then
             RESULTS_FILE=$(find runs -name 'results.json' | head -1)
-            cat "$RESULTS_FILE" | jq '{n_resolved, n_unresolved, accuracy}' 2>/dev/null || cat "$RESULTS_FILE"
+            echo "Results file: $RESULTS_FILE"
+            echo ""
+            echo "Full results.json:"
+            cat "$RESULTS_FILE" | jq '.' || cat "$RESULTS_FILE"
+            echo ""
+            echo "Per-task summary:"
+            cat "$RESULTS_FILE" | jq -r '.trials[] | "\(.task_id): \(if .resolved then "✓ PASS" else "✗ FAIL" end)"' 2>/dev/null || echo "Failed to parse task details"
           else
-            echo "❌ No results.json found"
-            ls -laR runs/ 2>/dev/null || echo "runs/ directory missing"
+            echo "No results.json found in runs/"
+            ls -la runs/
           fi
 
       - name: Set artifact name
@@ -144,7 +155,6 @@ jobs:
           name: ${{ steps.artifact-name.outputs.name }}
           path: |
             runs/
-            benchmark.log
           if-no-files-found: warn
           retention-days: 30
 
diff --git a/Makefile b/Makefile
index a27559132..52711aaee 100644
--- a/Makefile
+++ b/Makefile
@@ -39,7 +39,7 @@ include fmt.mk
 .PHONY: dist dist-mac dist-win dist-linux
 .PHONY: docs docs-build docs-watch
 .PHONY: storybook storybook-build test-storybook chromatic
-.PHONY: benchmark-terminal
+.PHONY: benchmark-terminal benchmark-terminal-adaptive
 .PHONY: ensure-deps
 .PHONY: check-eager-imports check-bundle-size check-startup
 
@@ -295,10 +295,14 @@ chromatic: node_modules/.installed ## Run Chromatic for visual regression testin
 	@bun x chromatic --exit-zero-on-changes
 
 ## Benchmarks
-benchmark-terminal: ## Run Terminal-Bench with the cmux agent (use TB_DATASET/TB_SAMPLE_SIZE/TB_TIMEOUT/TB_ARGS to customize)
+benchmark-terminal: benchmark-terminal-adaptive ## Run Terminal-Bench with adaptive concurrency (alias)
+
+.PHONY: benchmark-terminal-adaptive
+benchmark-terminal-adaptive: ## Run Terminal-Bench with adaptive concurrency (auto-scales 1-16, use TB_LOAD_THRESHOLD/TB_CHECK_INTERVAL)
 	@TB_DATASET=$${TB_DATASET:-terminal-bench-core==0.1.1}; \
 	TB_TIMEOUT=$${TB_TIMEOUT:-1800}; \
-	CONCURRENCY_FLAG=$${TB_CONCURRENCY:+--n-concurrent $$TB_CONCURRENCY}; \
+	TB_LOAD_THRESHOLD=$${TB_LOAD_THRESHOLD:-1.0}; \
+	TB_CHECK_INTERVAL=$${TB_CHECK_INTERVAL:-60}; \
 	LIVESTREAM_FLAG=$${TB_LIVESTREAM:+--livestream}; \
 	TASK_ID_FLAGS=""; \
 	if [ -n "$$TB_SAMPLE_SIZE" ]; then \
@@ -318,14 +322,14 @@ benchmark-terminal: ## Run Terminal-Bench with the cmux agent (use TB_DATASET/TB
 		done; \
 		echo "Selected task IDs: $$TASK_IDS"; \
 	fi; \
-	echo "Using timeout: $$TB_TIMEOUT seconds"; \
-	echo "Running Terminal-Bench with dataset $$TB_DATASET"; \
-	export CMUX_TIMEOUT_MS=$$((TB_TIMEOUT * 1000)); \
-	uvx terminal-bench run \
+	echo "Running adaptive terminal-bench (auto-scaling 1-16, load threshold: $$TB_LOAD_THRESHOLD)"; \
+	python3 benchmarks/terminal_bench/adaptive_bench.py \
+		--load-threshold $$TB_LOAD_THRESHOLD \
+		--check-interval $$TB_CHECK_INTERVAL \
+		-- \
 		--dataset "$$TB_DATASET" \
 		--agent-import-path benchmarks.terminal_bench.cmux_agent:CmuxAgent \
 		--global-agent-timeout-sec $$TB_TIMEOUT \
-		$$CONCURRENCY_FLAG \
 		$$LIVESTREAM_FLAG \
 		$$TASK_ID_FLAGS \
 		$${TB_ARGS}
diff --git a/benchmarks/terminal_bench/README.md b/benchmarks/terminal_bench/README.md
index c106c8804..91f65ab3b 100644
--- a/benchmarks/terminal_bench/README.md
+++ b/benchmarks/terminal_bench/README.md
@@ -4,13 +4,18 @@ This directory contains the cmux agent adapter for [Terminal-Bench](https://gith
 
 ## Quick Start
 
+Terminal-bench now runs with **adaptive concurrency by default**, automatically scaling from 1-16 concurrent tasks based on system load.
+
 ```bash
-# Run full benchmark suite (80 tasks, ~2.5 hours)
+# Run full benchmark suite (80 tasks, ~2.5 hours) with adaptive concurrency
 make benchmark-terminal
 
 # Run with sample of 5 tasks
 TB_SAMPLE_SIZE=5 make benchmark-terminal
 
+# Adjust load threshold (default: 1.0)
+TB_LOAD_THRESHOLD=2.0 make benchmark-terminal
+
 # Run specific tasks
 make benchmark-terminal TB_ARGS="--task-id hello-world --task-id chess-best-move"
 
@@ -24,7 +29,8 @@ make benchmark-terminal TB_ARGS="--agent-kwarg model_name=anthropic:claude-opus-
 
 - `TB_DATASET`: Dataset to use (default: `terminal-bench-core==0.1.1`)
 - `TB_SAMPLE_SIZE`: Number of random tasks to run (default: all 80 tasks)
-- `TB_CONCURRENCY`: Number of concurrent tasks (default: 4)
+- `TB_LOAD_THRESHOLD`: Load average threshold for concurrency adjustments (default: 1.0)
+- `TB_CHECK_INTERVAL`: Seconds between bursts (default: 60)
 - `TB_LIVESTREAM`: Enable livestream mode (set to `1` to enable)
 - `TB_TIMEOUT`: Global timeout in seconds (default: 1800 = 30 minutes)
 - `TB_ARGS`: Additional arguments passed to terminal-bench
@@ -99,6 +105,49 @@ Based on analysis of the Oct 30 nightly run (15-minute timeout):
 
 **Impact of 30-minute timeout**: Expected to reduce false timeout failures by ~50% and improve pass rates by 10-15 percentage points (from ~42% to ~52-57%).
 
+## Adaptive Concurrency
+
+Terminal-bench uses **adaptive concurrency** that automatically scales from 1-16 concurrent tasks based on system load using a **burst-and-resume pattern**:
+
+### How It Works
+
+1. **Starts with concurrency=1** and runs a burst
+2. **Monitors system load** (1-minute average) after each burst completes
+3. **Adjusts concurrency** using hysteresis:
+   - **Double** when load < threshold (default: 1.0)
+   - **Halve** when load > threshold
+   - **Bounded to [1, 16]** for optimal performance
+4. **Resumes** the run with updated concurrency (skips completed tasks)
+
+The burst-and-resume pattern leverages terminal-bench's native resume capability. Each burst runs to completion with no mid-task interruption, ensuring clean Docker container lifecycle.
+
+### Configuration
+
+```bash
+# Adjust load threshold (default: 1.0)
+TB_LOAD_THRESHOLD=2.0 make benchmark-terminal
+
+# Faster adjustments (default: 60s between bursts)
+TB_CHECK_INTERVAL=30 make benchmark-terminal
+
+# Sample 5 tasks with adaptive concurrency
+TB_SAMPLE_SIZE=5 make benchmark-terminal
+```
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `TB_LOAD_THRESHOLD` | 1.0 | Load average threshold for adjusting concurrency |
+| `TB_CHECK_INTERVAL` | 60 | Seconds to wait between bursts |
+
+### Tradeoffs
+
+- ✅ Automatically finds optimal concurrency for hardware
+- ✅ Prevents system overload
+- ✅ Clean container lifecycle (no mid-task kills)
+- ✅ Bounded to [1, 16] for safety
+- ⚠️ Burst overhead (~2-5s, negligible for 6+ min avg tasks)
+- ⚠️ Adjustment latency = burst duration + check interval
+
 ## Files
 
 - `cmux_agent.py`: Main agent adapter implementing Terminal-Bench's agent interface
@@ -106,3 +155,5 @@ Based on analysis of the Oct 30 nightly run (15-minute timeout):
 - `cmux_payload.py`: Helper to package cmux app for containerized execution
 - `cmux_setup.sh.j2`: Jinja2 template for agent installation script
 - `sample_tasks.py`: Utility to randomly sample tasks from dataset
+- `adaptive_bench.py`: Adaptive concurrency wrapper using burst-and-resume pattern
+- `adaptive_bench_test.py`: Unit tests for adaptive_bench.py
diff --git a/benchmarks/terminal_bench/adaptive_bench.py b/benchmarks/terminal_bench/adaptive_bench.py
new file mode 100755
index 000000000..8d47e1054
--- /dev/null
+++ b/benchmarks/terminal_bench/adaptive_bench.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python3
+"""
+Adaptive concurrency wrapper for terminal-bench using burst-and-resume pattern.
+
+Runs terminal-bench in bursts with adjustable concurrency, using tb's native
+resume capability to skip completed tasks between bursts.
+"""
+
+import argparse
+import json
+import os
+import subprocess
+import sys
+import time
+from pathlib import Path
+from typing import Optional
+
+
+class AdaptiveBench:
+    """
+    Adaptive concurrency wrapper for terminal-bench.
+
+    Concurrency is automatically bounded to [1, 16] for optimal performance
+    across different hardware configurations.
+    """
+
+    MIN_CONCURRENT = 1
+    MAX_CONCURRENT = 16
+
+    def __init__(
+        self,
+        load_threshold: float,
+        check_interval: int,
+        runs_dir: Path,
+        tb_args: list[str],
+    ):
+        self.load_threshold = load_threshold
+        self.check_interval = check_interval
+        self.runs_dir = runs_dir
+        self.tb_args = tb_args
+        self.current_concurrent = self.MIN_CONCURRENT
+        self.run_id: Optional[str] = None
+        self.burst_count = 0
+
+    def get_load_avg(self) -> float:
+        """Get 1-minute load average."""
+        return os.getloadavg()[0]
+
+    def get_run_status(self) -> dict:
+        """Get status of current run by parsing results.json and tb.lock."""
+        if not self.run_id:
+            return {"total": 0, "completed": 0, "incomplete": 0}
+
+        try:
+            # Parse tb.lock to get task count
+            lock_path = self.runs_dir / self.run_id / "tb.lock"
+            if lock_path.exists():
+                with open(lock_path) as f:
+                    lock_data = json.load(f)
+                    total_tasks = len(lock_data.get("dataset", {}).get("task_ids", []))
+            else:
+                total_tasks = 0
+
+            # Count completed tasks from results.json
+            results_path = self.runs_dir / self.run_id / "results.json"
+            completed = 0
+            if results_path.exists():
+                with open(results_path) as f:
+                    results_data = json.load(f)
+                    # Count unique task_ids in results
+                    completed = len(
+                        set(r["task_id"] for r in results_data.get("results", []))
+                    )
+
+            return {
+                "total": total_tasks,
+                "completed": completed,
+                "incomplete": max(0, total_tasks - completed),
+            }
+        except Exception as e:
+            print(f"⚠️  Error getting run status: {e}")
+            return {"total": 0, "completed": 0, "incomplete": 0}
+
+    def adjust_concurrency(self) -> bool:
+        """Check load and adjust concurrency. Returns True if changed."""
+        load = self.get_load_avg()
+        old_concurrent = self.current_concurrent
+
+        if load < self.load_threshold and self.current_concurrent < self.MAX_CONCURRENT:
+            self.current_concurrent = min(
+                self.current_concurrent * 2, self.MAX_CONCURRENT
+            )
+        elif (
+            load > self.load_threshold and self.current_concurrent > self.MIN_CONCURRENT
+        ):
+            self.current_concurrent = max(
+                self.current_concurrent // 2, self.MIN_CONCURRENT
+            )
+
+        if self.current_concurrent != old_concurrent:
+            print(
+                f"📊 Load: {load:.2f} (threshold: {self.load_threshold}) → "
+                f"Concurrency: {old_concurrent} → {self.current_concurrent}"
+            )
+            return True
+
+        print(f"📊 Load: {load:.2f} (threshold: {self.load_threshold}) → No change")
+        return False
+
+    def run_burst(self) -> int:
+        """Run a single burst of terminal-bench. Returns exit code."""
+        self.burst_count += 1
+
+        if self.burst_count == 1:
+            # First burst - create new run
+            cmd = [
+                "uvx",
+                "terminal-bench",
+                "run",
+                "--n-concurrent",
+                str(self.current_concurrent),
+                "--output-path",
+                str(self.runs_dir),
+                *self.tb_args,
+            ]
+            print(
+                f"🚀 Burst #{self.burst_count}: Starting NEW run with "
+                f"concurrency={self.current_concurrent}"
+            )
+        else:
+            # Subsequent bursts - update tb.lock BEFORE resume
+            # This ensures the resume command picks up the new concurrency
+            self._update_lock_concurrency()
+
+            # Resume existing run
+            cmd = [
+                "uvx",
+                "terminal-bench",
+                "runs",
+                "resume",
+                "--run-id",
+                self.run_id,
+                "--runs-dir",
+                str(self.runs_dir),
+            ]
+            print(
+                f"🔄 Burst #{self.burst_count}: Resuming run {self.run_id} "
+                f"with concurrency={self.current_concurrent}"
+            )
+
+        print(f"   Command: {' '.join(cmd)}")
+        burst_start = time.time()
+
+        # Run terminal-bench
+        result = subprocess.run(cmd, env=os.environ.copy())
+
+        burst_duration = time.time() - burst_start
+
+        # Capture run_id from first burst
+        if self.burst_count == 1 and result.returncode == 0:
+            # Find most recent run directory
+            if self.runs_dir.exists():
+                run_dirs = [
+                    d
+                    for d in self.runs_dir.iterdir()
+                    if d.is_dir() and (d / "tb.lock").exists()
+                ]
+                if run_dirs:
+                    # Sort by modification time and take most recent
+                    self.run_id = sorted(run_dirs, key=lambda p: p.stat().st_mtime)[
+                        -1
+                    ].name
+                    print(f"📝 Captured run_id: {self.run_id}")
+
+        print(f"⏱️  Burst #{self.burst_count} completed in {burst_duration:.1f}s")
+
+        return result.returncode
+
+    def _update_lock_concurrency(self):
+        """Update n_concurrent_trials in tb.lock for next resume."""
+        lock_path = self.runs_dir / self.run_id / "tb.lock"
+        if not lock_path.exists():
+            return
+
+        try:
+            with open(lock_path, "r") as f:
+                lock_data = json.load(f)
+
+            # Update concurrency in lock file
+            if "run_config" in lock_data:
+                lock_data["run_config"]["n_concurrent_trials"] = self.current_concurrent
+
+            with open(lock_path, "w") as f:
+                json.dump(lock_data, f, indent=2)
+
+            print(f"   Updated tb.lock with concurrency={self.current_concurrent}")
+        except Exception as e:
+            print(f"⚠️  Could not update tb.lock: {e}")
+
+    def run(self):
+        """Main loop: run bursts with adaptive concurrency."""
+        try:
+            while True:
+                # Run burst with current concurrency
+                exit_code = self.run_burst()
+
+                if exit_code != 0:
+                    print(f"❌ Terminal-bench exited with code {exit_code}")
+                    return exit_code
+
+                # Check if we're done
+                status = self.get_run_status()
+                print(
+                    f"📈 Progress: {status['completed']}/{status['total']} tasks "
+                    f"({status['incomplete']} remaining)"
+                )
+
+                if status["incomplete"] == 0:
+                    print("✅ All tasks completed!")
+                    return 0
+
+                # Wait before next burst and potentially adjust concurrency
+                print(f"⏸️  Waiting {self.check_interval}s before next burst...")
+                time.sleep(self.check_interval)
+                self.adjust_concurrency()
+
+        except KeyboardInterrupt:
+            print("\n⚠️  Received interrupt, stopping...")
+            return 130
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Run terminal-bench with adaptive concurrency (auto-scales 1-16 based on load)"
+    )
+    parser.add_argument(
+        "--load-threshold",
+        type=float,
+        default=1.0,
+        help="Load average threshold for adjusting concurrency (default: 1.0)",
+    )
+    parser.add_argument(
+        "--check-interval",
+        type=int,
+        default=60,
+        help="Seconds between bursts (default: 60)",
+    )
+    parser.add_argument(
+        "--runs-dir",
+        type=Path,
+        default=Path("runs"),
+        help="Directory for run outputs (default: runs)",
+    )
+    parser.add_argument(
+        "tb_args",
+        nargs=argparse.REMAINDER,
+        help="Arguments to pass to terminal-bench run",
+    )
+
+    args = parser.parse_args()
+
+    # Strip leading '--' from tb_args if present
+    tb_args = args.tb_args
+    if tb_args and tb_args[0] == "--":
+        tb_args = tb_args[1:]
+
+    bench = AdaptiveBench(
+        load_threshold=args.load_threshold,
+        check_interval=args.check_interval,
+        runs_dir=args.runs_dir,
+        tb_args=tb_args,
+    )
+
+    sys.exit(bench.run())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/terminal_bench/adaptive_bench_test.py b/benchmarks/terminal_bench/adaptive_bench_test.py
new file mode 100644
index 000000000..f15bffff2
--- /dev/null
+++ b/benchmarks/terminal_bench/adaptive_bench_test.py
@@ -0,0 +1,269 @@
+"""Tests for adaptive_bench.py"""
+
+import json
+import os
+from pathlib import Path
+from unittest.mock import MagicMock, mock_open, patch
+
+import pytest
+
+from adaptive_bench import AdaptiveBench
+
+
+class TestAdaptiveBench:
+    """Test suite for AdaptiveBench."""
+
+    def test_init(self):
+        """Test AdaptiveBench initialization."""
+        bench = AdaptiveBench(
+            load_threshold=2.0,
+            check_interval=30,
+            max_concurrent=8,
+            runs_dir=Path("test_runs"),
+            tb_args=["--dataset", "test"],
+        )
+
+        assert bench.load_threshold == 2.0
+        assert bench.check_interval == 30
+        assert bench.max_concurrent == 8
+        assert bench.runs_dir == Path("test_runs")
+        assert bench.tb_args == ["--dataset", "test"]
+        assert bench.current_concurrent == 1
+        assert bench.run_id is None
+        assert bench.burst_count == 0
+
+    @patch("adaptive_bench.os.getloadavg")
+    def test_get_load_avg(self, mock_getloadavg):
+        """Test getting load average."""
+        mock_getloadavg.return_value = (2.5, 2.0, 1.5)
+        bench = AdaptiveBench(
+            load_threshold=1.0,
+            check_interval=60,
+            max_concurrent=16,
+            runs_dir=Path("runs"),
+            tb_args=[],
+        )
+
+        load = bench.get_load_avg()
+        assert load == 2.5
+        mock_getloadavg.assert_called_once()
+
+    @patch("adaptive_bench.os.getloadavg")
+    def test_adjust_concurrency_increase(self, mock_getloadavg):
+        """Test concurrency increases when load is low."""
+        mock_getloadavg.return_value = (0.5, 0.5, 0.5)
+        bench = AdaptiveBench(
+            load_threshold=1.0,
+            check_interval=60,
+            max_concurrent=16,
+            runs_dir=Path("runs"),
+            tb_args=[],
+        )
+
+        bench.current_concurrent = 2
+        changed = bench.adjust_concurrency()
+
+        assert changed is True
+        assert bench.current_concurrent == 4  # Doubled
+
+    @patch("adaptive_bench.os.getloadavg")
+    def test_adjust_concurrency_decrease(self, mock_getloadavg):
+        """Test concurrency decreases when load is high."""
+        mock_getloadavg.return_value = (2.0, 2.0, 2.0)
+        bench = AdaptiveBench(
+            load_threshold=1.0,
+            check_interval=60,
+            max_concurrent=16,
+            runs_dir=Path("runs"),
+            tb_args=[],
+        )
+
+        bench.current_concurrent = 8
+        changed = bench.adjust_concurrency()
+
+        assert changed is True
+        assert bench.current_concurrent == 4  # Halved
+
+    @patch("adaptive_bench.os.getloadavg")
+    def test_adjust_concurrency_no_change(self, mock_getloadavg):
+        """Test concurrency stays same when load is at threshold."""
+        mock_getloadavg.return_value = (1.0, 1.0, 1.0)
+        bench = AdaptiveBench(
+            load_threshold=1.0,
+            check_interval=60,
+            max_concurrent=16,
+            runs_dir=Path("runs"),
+            tb_args=[],
+        )
+
+        bench.current_concurrent = 4
+        changed = bench.adjust_concurrency()
+
+        assert changed is False
+        assert bench.current_concurrent == 4
+
+    @patch("adaptive_bench.os.getloadavg")
+    def test_adjust_concurrency_respects_max(self, mock_getloadavg):
+        """Test concurrency doesn't exceed max_concurrent."""
+        mock_getloadavg.return_value = (0.1, 0.1, 0.1)
+        bench = AdaptiveBench(
+            load_threshold=1.0,
+            check_interval=60,
+            max_concurrent=8,
+            runs_dir=Path("runs"),
+            tb_args=[],
+        )
+
+        bench.current_concurrent = 8
+        changed = bench.adjust_concurrency()
+
+        assert changed is False
+        assert bench.current_concurrent == 8  # Stays at max
+
+    @patch("adaptive_bench.os.getloadavg")
+    def test_adjust_concurrency_respects_min(self, mock_getloadavg):
+        """Test concurrency doesn't go below 1."""
+        mock_getloadavg.return_value = (5.0, 5.0, 5.0)
+        bench = AdaptiveBench(
+            load_threshold=1.0,
+            check_interval=60,
+            max_concurrent=16,
+            runs_dir=Path("runs"),
+            tb_args=[],
+        )
+
+        bench.current_concurrent = 1
+        changed = bench.adjust_concurrency()
+
+        assert changed is False
+        assert bench.current_concurrent == 1  # Stays at min
+
+    def test_get_run_status_no_run_id(self):
+        """Test get_run_status returns zeros when no run_id."""
+        bench = AdaptiveBench(
+            load_threshold=1.0,
+            check_interval=60,
+            max_concurrent=16,
+            runs_dir=Path("runs"),
+            tb_args=[],
+        )
+
+        status = bench.get_run_status()
+        assert status == {"total": 0, "completed": 0, "incomplete": 0}
+
+    @patch("builtins.open", new_callable=mock_open)
+    @patch("pathlib.Path.exists")
+    def test_get_run_status_with_results(self, mock_exists, mock_file):
+        """Test get_run_status parses results correctly."""
+        bench = AdaptiveBench(
+            load_threshold=1.0,
+            check_interval=60,
+            max_concurrent=16,
+            runs_dir=Path("runs"),
+            tb_args=[],
+        )
+        bench.run_id = "test-run"
+
+        # Mock tb.lock with 5 tasks
+        tb_lock_data = {
+            "dataset": {"task_ids": ["task1", "task2", "task3", "task4", "task5"]}
+        }
+
+        # Mock results.json with 3 completed tasks
+        results_data = {
+            "results": [
+                {"task_id": "task1", "resolved": True},
+                {"task_id": "task2", "resolved": False},
+                {"task_id": "task3", "resolved": True},
+            ]
+        }
+
+        def exists_side_effect(path):
+            return True  # Both files exist
+
+        mock_exists.side_effect = exists_side_effect
+
+        def open_side_effect(path, *args, **kwargs):
+            if "tb.lock" in str(path):
+                return mock_open(read_data=json.dumps(tb_lock_data)).return_value
+            elif "results.json" in str(path):
+                return mock_open(read_data=json.dumps(results_data)).return_value
+            return mock_open().return_value
+
+        mock_file.side_effect = open_side_effect
+
+        status = bench.get_run_status()
+
+        assert status["total"] == 5
+        assert status["completed"] == 3
+        assert status["incomplete"] == 2
+
+    @patch("adaptive_bench.subprocess.run")
+    @patch("adaptive_bench.time.time")
+    def test_run_burst_first_burst(self, mock_time, mock_subprocess):
+        """Test first burst creates new run."""
+        mock_time.side_effect = [0, 10]  # Start and end time
+        mock_subprocess.return_value = MagicMock(returncode=0)
+
+        bench = AdaptiveBench(
+            load_threshold=1.0,
+            check_interval=60,
+            max_concurrent=16,
+            runs_dir=Path("runs"),
+            tb_args=["--dataset", "test"],
+        )
+
+        with patch("pathlib.Path.exists") as mock_exists:
+            mock_exists.return_value = False
+
+            exit_code = bench.run_burst()
+
+        assert exit_code == 0
+        assert bench.burst_count == 1
+
+        # Verify command
+        call_args = mock_subprocess.call_args
+        cmd = call_args[0][0]
+        assert cmd[0] == "uvx"
+        assert cmd[1] == "terminal-bench"
+        assert cmd[2] == "run"
+        assert "--n-concurrent" in cmd
+        assert "1" in cmd  # Initial concurrency
+        assert "--dataset" in cmd
+        assert "test" in cmd
+
+    @patch("builtins.open", new_callable=mock_open)
+    @patch("pathlib.Path.exists")
+    def test_update_lock_concurrency(self, mock_exists, mock_file):
+        """Test updating tb.lock with new concurrency."""
+        bench = AdaptiveBench(
+            load_threshold=1.0,
+            check_interval=60,
+            max_concurrent=16,
+            runs_dir=Path("runs"),
+            tb_args=[],
+        )
+        bench.run_id = "test-run"
+        bench.current_concurrent = 4
+
+        mock_exists.return_value = True
+
+        lock_data = {"run_config": {"n_concurrent_trials": 1, "other_field": "value"}}
+
+        # Setup mock to return lock_data on read
+        mock_file.return_value.read.return_value = json.dumps(lock_data)
+        mock_file.return_value.__enter__.return_value = mock_file.return_value
+
+        bench._update_lock_concurrency()
+
+        # Verify write was called with updated concurrency
+        write_calls = [
+            call
+            for call in mock_file.return_value.write.call_args_list
+            if call[0][0]  # Filter out empty writes
+        ]
+
+        if write_calls:
+            written_data = write_calls[0][0][0]
+            written_lock = json.loads(written_data)
+            assert written_lock["run_config"]["n_concurrent_trials"] == 4