Skip to content

Commit 3e1db73

Browse files
committed
Reconnect to the database when a transient error occurs
1 parent cd926b0 commit 3e1db73

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

collector/src/bin/collector.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1378,6 +1378,7 @@ Make sure to modify `{dir}/perf-config.json` if the category/artifact don't matc
13781378
get_compile_benchmarks(&compile_benchmark_dir, CompileBenchmarkFilter::All)?;
13791379

13801380
rt.block_on(run_job_queue_benchmarks(
1381+
pool,
13811382
conn,
13821383
&collector_config,
13831384
benchmarks,
@@ -1392,15 +1393,14 @@ Make sure to modify `{dir}/perf-config.json` if the category/artifact don't matc
13921393
const MAX_JOB_FAILS: u32 = 3;
13931394

13941395
async fn run_job_queue_benchmarks(
1396+
pool: Pool,
13951397
mut conn: Box<dyn Connection>,
13961398
collector: &CollectorConfig,
13971399
all_compile_benchmarks: Vec<Benchmark>,
13981400
) -> anyhow::Result<()> {
1399-
let conn = conn.as_mut();
14001401
conn.update_collector_heartbeat(collector.name()).await?;
14011402

14021403
// TODO: check collector SHA vs site SHA
1403-
// TODO: reconnect to the DB if there was an error with the previous job
14041404
while let Some((benchmark_job, artifact_id)) = conn
14051405
.dequeue_benchmark_job(
14061406
collector.name(),
@@ -1411,7 +1411,7 @@ async fn run_job_queue_benchmarks(
14111411
{
14121412
log::info!("Dequeued job {benchmark_job:?}, artifact_id {artifact_id:?}");
14131413
let result = run_benchmark_job(
1414-
conn,
1414+
conn.as_mut(),
14151415
&benchmark_job,
14161416
artifact_id.clone(),
14171417
&all_compile_benchmarks,
@@ -1457,8 +1457,10 @@ async fn run_job_queue_benchmarks(
14571457
// There was some transient (i.e. I/O, network or database) error.
14581458
// Let's retry the job later, with some sleep
14591459
log::info!("Retrying after 30s...");
1460-
tokio::time::sleep(Duration::from_secs(3)).await;
14611460
tokio::time::sleep(Duration::from_secs(30)).await;
1461+
1462+
// Maybe there was a DB issue. Try to reconnect to the database.
1463+
conn = pool.connection().await;
14621464
}
14631465
}
14641466
}

0 commit comments

Comments
 (0)