1+ #[ cfg( test) ]
2+ mod tests;
3+
14use std:: path:: { Path , PathBuf } ;
25use std:: process:: { Command , Stdio } ;
36
@@ -165,7 +168,175 @@ pub fn get_closest_merge_commit(
165168 Ok ( output_result ( & mut git) ?. trim ( ) . to_owned ( ) )
166169}
167170
171+ /// Represents the result of checking whether a set of paths
172+ /// have been modified locally or not.
173+ #[ derive( PartialEq , Debug ) ]
174+ pub enum PathFreshness {
175+ /// Artifacts should be downloaded from this upstream commit,
176+ /// there are no local modifications.
177+ LastModifiedUpstream { upstream : String } ,
178+ /// There are local modifications to a certain set of paths.
179+ /// "Local" essentially means "not-upstream" here.
180+ /// `upstream` is the latest upstream merge commit that made modifications to the
181+ /// set of paths.
182+ HasLocalModifications { upstream : String } ,
183+ }
184+
185+ /// This function figures out if a set of paths was last modified upstream or
186+ /// if there are some local modifications made to them.
187+ ///
188+ /// It can be used to figure out if we should download artifacts from CI or rather
189+ /// build them locally.
190+ ///
191+ /// `target_paths` should be a non-empty slice of paths (relative to `git_dir` or the
192+ /// current working directory) whose modifications would invalidate the artifact.
193+ ///
194+ /// The function behaves differently in CI and outside CI.
195+ ///
196+ /// - Outside CI, we want to find out if `target_paths` were modified in some local commit on
197+ /// top of the local master branch.
198+ /// If not, we try to find the most recent upstream commit (which we assume are commits
199+ /// made by bors) that modified `target_paths`.
200+ /// We don't want to simply take the latest master commit to avoid changing the output of
201+ /// this function frequently after rebasing on the latest master branch even if `target_paths`
202+ /// were not modified upstream in the meantime. In that case we would be redownloading CI
203+ /// artifacts unnecessarily.
204+ ///
205+ /// - In CI, we always fetch only a single parent merge commit, so we do not have access
206+ /// to the full git history.
207+ /// Luckily, we only need to distinguish between two situations. The first is that the current
208+ /// PR made modifications to `target_paths`. If not, then we simply take the latest upstream
209+ /// commit, because on CI there is no need to avoid redownloading.
210+ pub fn check_path_modifications (
211+ git_dir : Option < & Path > ,
212+ config : & GitConfig < ' _ > ,
213+ target_paths : & [ & str ] ,
214+ ci_env : CiEnv ,
215+ ) -> Result < PathFreshness , String > {
216+ assert ! ( !target_paths. is_empty( ) ) ;
217+ for path in target_paths {
218+ assert ! ( Path :: new( path. trim_start_matches( ":!" ) ) . is_relative( ) ) ;
219+ }
220+
221+ let upstream_sha = if matches ! ( ci_env, CiEnv :: GitHubActions ) {
222+ // Here the situation is different for PR CI and try/auto CI.
223+ // For PR CI, we have the following history:
224+ // <merge commit made by GitHub>
225+ // 1-N PR commits
226+ // upstream merge commit made by bors
227+ //
228+ // For try/auto CI, we have the following history:
229+ // <**non-upstream** merge commit made by bors>
230+ // 1-N PR commits
231+ // upstream merge commit made by bors
232+ //
233+ // But on both cases, HEAD should be a merge commit.
234+ // So if HEAD contains modifications of `target_paths`, our PR has modified
235+ // them. If not, we can use the only available upstream commit for downloading
236+ // artifacts.
237+
238+ // Do not include HEAD, as it is never an upstream commit
239+ get_closest_upstream_commit ( git_dir, config, ci_env) ?
240+ } else {
241+ // Outside CI, we have to find the most recent upstream commit that
242+ // modified the set of paths, to have an upstream reference.
243+ let upstream_sha = get_latest_commit_that_modified_files (
244+ git_dir,
245+ target_paths,
246+ config. git_merge_commit_email ,
247+ ) ?;
248+ let Some ( upstream_sha) = upstream_sha else {
249+ eprintln ! ( "No upstream commit that modified paths {target_paths:?} found." ) ;
250+ eprintln ! ( "Try to fetch more upstream history." ) ;
251+ return Err ( "No upstream commit with modifications found" . to_string ( ) ) ;
252+ } ;
253+ upstream_sha
254+ } ;
255+
256+ if has_changed_since ( git_dir, & upstream_sha, target_paths) {
257+ Ok ( PathFreshness :: HasLocalModifications { upstream : upstream_sha } )
258+ } else {
259+ Ok ( PathFreshness :: LastModifiedUpstream { upstream : upstream_sha } )
260+ }
261+ }
262+
263+ /// Returns true if any of the passed `paths` have changed since the `base` commit.
264+ pub fn has_changed_since ( git_dir : Option < & Path > , base : & str , paths : & [ & Path ] ) -> bool {
265+ let mut git = Command :: new ( "git" ) ;
266+
267+ if let Some ( git_dir) = git_dir {
268+ git. current_dir ( git_dir) ;
269+ }
270+
271+ git. args ( [ "diff-index" , "--quiet" , base, "--" ] ) . args ( paths) ;
272+
273+ // Exit code 0 => no changes
274+ // Exit code 1 => some changes were detected
275+ !git. status ( ) . expect ( "cannot run git diff-index" ) . success ( )
276+ }
277+
278+ /// Returns the latest commit that modified `target_paths`, or `None` if no such commit was found.
279+ /// If `author` is `Some`, only considers commits made by that author.
280+ fn get_latest_commit_that_modified_files (
281+ git_dir : Option < & Path > ,
282+ target_paths : & [ & str ] ,
283+ author : & str ,
284+ ) -> Result < Option < String > , String > {
285+ let mut git = Command :: new ( "git" ) ;
286+
287+ if let Some ( git_dir) = git_dir {
288+ git. current_dir ( git_dir) ;
289+ }
290+
291+ git. args ( [ "rev-list" , "-n1" , "--first-parent" , "HEAD" , "--author" , author] ) ;
292+
293+ if !target_paths. is_empty ( ) {
294+ git. arg ( "--" ) . args ( target_paths) ;
295+ }
296+ let output = output_result ( & mut git) ?. trim ( ) . to_owned ( ) ;
297+ if output. is_empty ( ) { Ok ( None ) } else { Ok ( Some ( output) ) }
298+ }
299+
300+ /// Returns the most recent commit found in the local history that should definitely
301+ /// exist upstream. We identify upstream commits by the e-mail of the commit author.
302+ ///
303+ /// If `include_head` is false, the HEAD (current) commit will be ignored and only
304+ /// its parents will be searched. This is useful for try/auto CI, where HEAD is
305+ /// actually a commit made by bors, although it is not upstream yet.
306+ fn get_closest_upstream_commit (
307+ git_dir : Option < & Path > ,
308+ config : & GitConfig < ' _ > ,
309+ env : CiEnv ,
310+ ) -> Result < String , String > {
311+ let mut git = Command :: new ( "git" ) ;
312+
313+ if let Some ( git_dir) = git_dir {
314+ git. current_dir ( git_dir) ;
315+ }
316+
317+ let base = match env {
318+ CiEnv :: None => "HEAD" ,
319+ CiEnv :: GitHubActions => {
320+ // On CI, we always have a merge commit at the tip.
321+ // We thus skip it, because although it can be creatd by
322+ // `config.git_merge_commit_email`, it should not be upstream.
323+ "HEAD^1"
324+ }
325+ } ;
326+ git. args ( [
327+ "rev-list" ,
328+ & format ! ( "--author={}" , config. git_merge_commit_email) ,
329+ "-n1" ,
330+ "--first-parent" ,
331+ & base,
332+ ] ) ;
333+
334+ Ok ( output_result ( & mut git) ?. trim ( ) . to_owned ( ) )
335+ }
336+
168337/// Returns the files that have been modified in the current branch compared to the master branch.
338+ /// This includes committed changes, uncommitted changes, and changes that are not even staged.
339+ ///
169340/// The `extensions` parameter can be used to filter the files by their extension.
170341/// Does not include removed files.
171342/// If `extensions` is empty, all files will be returned.
@@ -174,7 +345,7 @@ pub fn get_git_modified_files(
174345 git_dir : Option < & Path > ,
175346 extensions : & [ & str ] ,
176347) -> Result < Vec < String > , String > {
177- let merge_base = get_closest_merge_commit ( git_dir, config, & [ ] ) ?;
348+ let merge_base = get_closest_upstream_commit ( git_dir, config, CiEnv :: None ) ?;
178349
179350 let mut git = Command :: new ( "git" ) ;
180351 if let Some ( git_dir) = git_dir {
0 commit comments