@@ -65,8 +65,13 @@ use database::{
6565 CommitType , Connection , Pool ,
6666} ;
6767
68+ /// Directory used to cache downloaded Rust toolchains on disk.
6869const TOOLCHAIN_CACHE_DIRECTORY : & str = "cache" ;
6970
71+ /// Maximum allowed number of toolchains in the toolchain cache directory.
72+ /// If the directory will have more toolchains, it will be purged.
73+ const TOOLCHAIN_CACHE_MAX_TOOLCHAINS : usize = 30 ;
74+
7075fn n_normal_benchmarks_remaining ( n : usize ) -> String {
7176 let suffix = if n == 1 { "" } else { "s" } ;
7277 format ! ( "{n} normal benchmark{suffix} remaining" )
@@ -1269,15 +1274,8 @@ fn main_result() -> anyhow::Result<i32> {
12691274 }
12701275
12711276 Commands :: InstallNext { codegen_backends } => {
1272- let last_sha = Command :: new ( "git" )
1273- . arg ( "ls-remote" )
1274- . arg ( "https://github.com/rust-lang/rust.git" )
1275- . arg ( "master" )
1276- . output ( )
1277- . unwrap ( ) ;
1278- let last_sha = String :: from_utf8 ( last_sha. stdout ) . expect ( "utf8" ) ;
1279- let last_sha = last_sha. split_whitespace ( ) . next ( ) . expect ( & last_sha) ;
1280- let commit = get_commit_or_fake_it ( last_sha) . expect ( "success" ) ;
1277+ let last_sha = get_latest_sha ( "https://github.com/rust-lang/rust" ) . unwrap ( ) ;
1278+ let commit = get_commit_or_fake_it ( & last_sha) . expect ( "success" ) ;
12811279
12821280 let rt = build_async_runtime ( ) ;
12831281 let mut sysroot = rt
@@ -1434,6 +1432,8 @@ async fn run_job_queue_benchmarks(
14341432 all_compile_benchmarks : Vec < Benchmark > ,
14351433 check_git_sha : bool ,
14361434) -> anyhow:: Result < ( ) > {
1435+ let _ = tidy_toolchain_cache_dir ( ) ;
1436+
14371437 let mut last_request_tag = None ;
14381438
14391439 while let Some ( ( benchmark_job, artifact_id) ) = conn
@@ -1444,20 +1444,25 @@ async fn run_job_queue_benchmarks(
14441444 )
14451445 . await ?
14461446 {
1447+ // Are we benchmarking a different benchmark request than in the previous iteration of the
1448+ // loop?
1449+ let is_new_request = last_request_tag. is_some ( )
1450+ && last_request_tag. as_deref ( ) != Some ( benchmark_job. request_tag ( ) ) ;
1451+ if is_new_request {
1452+ let _ = tidy_toolchain_cache_dir ( ) ;
1453+ }
1454+
14471455 // Here we check if we should update our commit SHA, if rustc-perf has been updated.
14481456 // We only check for updates when we switch *benchmark requests*, not *benchmark jobs*,
14491457 // to avoid changing code in the middle of benchmarking the same request.
14501458 // Note that if an update happens, the job that we have just dequeued will have its deque
14511459 // counter increased. But since updates are relatively rare, that shouldn't be a big deal,
14521460 // it will be dequeued again when the collector starts again.
1453- if check_git_sha
1454- && last_request_tag. is_some ( )
1455- && last_request_tag. as_deref ( ) != Some ( benchmark_job. request_tag ( ) )
1456- && needs_git_update ( collector)
1457- {
1461+ if check_git_sha && is_new_request && needs_git_update ( collector) {
14581462 log:: warn!( "Exiting collector to update itself from git." ) ;
14591463 return Ok ( ( ) ) ;
14601464 }
1465+
14611466 last_request_tag = Some ( benchmark_job. request_tag ( ) . to_string ( ) ) ;
14621467
14631468 log:: info!( "Dequeued job {benchmark_job:?}, artifact_id {artifact_id:?}" ) ;
@@ -1523,28 +1528,32 @@ async fn run_job_queue_benchmarks(
15231528 Ok ( ( ) )
15241529}
15251530
1531+ /// Check the toolchain cache directory and delete it if it grows too large.
1532+ /// Currently, we just assume that "too large" means "has more than N toolchains".
1533+ fn tidy_toolchain_cache_dir ( ) -> std:: io:: Result < ( ) > {
1534+ let dir_count = Path :: new ( TOOLCHAIN_CACHE_DIRECTORY )
1535+ . read_dir ( ) ?
1536+ . filter_map ( |e| e. ok ( ) )
1537+ . filter_map ( |d| d. file_type ( ) . ok ( ) )
1538+ . filter ( |t| t. is_dir ( ) )
1539+ . count ( ) ;
1540+ if dir_count > TOOLCHAIN_CACHE_MAX_TOOLCHAINS {
1541+ log:: warn!( "Purging toolchain cache directory at {TOOLCHAIN_CACHE_DIRECTORY}" ) ;
1542+ // Just remove the whole directory, to avoid having to figure out which toolchains are old
1543+ std:: fs:: remove_dir_all ( TOOLCHAIN_CACHE_DIRECTORY ) ?;
1544+ }
1545+ Ok ( ( ) )
1546+ }
1547+
15261548/// Returns true if the commit SHA of collector does not match the latest commit SHA of the master
15271549/// branch of https://github.com/rust-lang/rustc-perf.
15281550fn needs_git_update ( collector : & CollectorConfig ) -> bool {
15291551 let Some ( commit_sha) = collector. commit_sha ( ) else {
15301552 return false ;
15311553 } ;
15321554
1533- let mut cmd = Command :: new ( "git" ) ;
1534- cmd. arg ( "ls-remote" )
1535- . arg ( "https://github.com/rust-lang/rustc-perf" )
1536- . arg ( "HEAD" ) ;
1537- let upstream_sha = match command_output ( & mut cmd) {
1538- Ok ( output) => String :: from_utf8 ( output. stdout )
1539- . unwrap ( )
1540- . split_whitespace ( )
1541- . next ( )
1542- . unwrap ( )
1543- . to_string ( ) ,
1544- Err ( error) => {
1545- log:: error!( "Cannot determine latest SHA of rustc-perf: {error:?}" ) ;
1546- return false ;
1547- }
1555+ let Ok ( upstream_sha) = get_latest_sha ( "https://github.com/rust-lang/rustc-perf" ) else {
1556+ return false ;
15481557 } ;
15491558 if commit_sha != upstream_sha {
15501559 log:: warn!(
@@ -1556,6 +1565,23 @@ fn needs_git_update(collector: &CollectorConfig) -> bool {
15561565 }
15571566}
15581567
1568+ /// Returns the latest known sha of the default branch of the specified `repo`.
1569+ fn get_latest_sha ( repo : & str ) -> anyhow:: Result < String > {
1570+ let mut cmd = Command :: new ( "git" ) ;
1571+ cmd. arg ( "ls-remote" ) . arg ( repo) . arg ( "HEAD" ) ;
1572+ match command_output ( & mut cmd) {
1573+ Ok ( output) => Ok ( String :: from_utf8 ( output. stdout ) ?
1574+ . split_whitespace ( )
1575+ . next ( )
1576+ . unwrap ( )
1577+ . to_string ( ) ) ,
1578+ Err ( error) => {
1579+ log:: error!( "Cannot determine latest SHA of {repo}: {error:?}" ) ;
1580+ Err ( error)
1581+ }
1582+ }
1583+ }
1584+
15591585/// Error that happened during benchmarking of a job.
15601586enum BenchmarkJobError {
15611587 /// The error is non-recoverable.
@@ -1606,8 +1632,6 @@ async fn run_benchmark_job(
16061632 } ;
16071633 // Avoid redownloading the same sysroot multiple times for different jobs, even
16081634 // across collector restarts.
1609-
1610- // TODO: Periodically clear the cache directory to avoid running out of disk space.
16111635 sysroot. preserve ( ) ;
16121636 Toolchain :: from_sysroot ( & sysroot, commit. sha . clone ( ) )
16131637 }
0 commit comments