1+ #[ cfg( test) ]
2+ mod tests;
3+
14use std:: path:: Path ;
25use std:: process:: { Command , Stdio } ;
36
@@ -167,7 +170,181 @@ pub fn get_closest_merge_commit(
167170 Ok ( output_result ( & mut git) ?. trim ( ) . to_owned ( ) )
168171}
169172
170- /// Returns the files that have been modified in the current branch compared to the last merge.
173+ /// Represents the result of checking whether a set of paths
174+ /// have been modified locally or not.
175+ #[ derive( PartialEq , Debug ) ]
176+ pub enum PathFreshness {
177+ /// Artifacts should be downloaded from this upstream commit,
178+ /// there are no local modifications.
179+ LastModifiedUpstream { upstream : String } ,
180+ /// There are local modifications to a certain set of paths.
181+ /// "Local" essentially means "not-upstream" here.
182+ /// `upstream` is the latest upstream merge commit that made modifications to the
183+ /// set of paths.
184+ HasLocalModifications { upstream : String } ,
185+ }
186+
187+ /// This function figures out if a set of paths was last modified upstream or
188+ /// if there are some local modifications made to them.
189+ ///
190+ /// It can be used to figure out if we should download artifacts from CI or rather
191+ /// build them locally.
192+ ///
193+ /// `target_paths` should be a non-empty slice of paths (git `pathspec`s) relative to `git_dir`
194+ /// or the current working directory whose modifications would invalidate the artifact.
195+ /// Each pathspec can also be a negative match, i.e. `:!foo`. This matches changes outside
196+ /// the `foo` directory.
197+ /// See https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefpathspecapathspec
198+ /// for how git `pathspec` works.
199+ ///
200+ /// The function behaves differently in CI and outside CI.
201+ ///
202+ /// - Outside CI, we want to find out if `target_paths` were modified in some local commit on
203+ /// top of the local master branch.
204+ /// If not, we try to find the most recent upstream commit (which we assume are commits
205+ /// made by bors) that modified `target_paths`.
206+ /// We don't want to simply take the latest master commit to avoid changing the output of
207+ /// this function frequently after rebasing on the latest master branch even if `target_paths`
208+ /// were not modified upstream in the meantime. In that case we would be redownloading CI
209+ /// artifacts unnecessarily.
210+ ///
211+ /// - In CI, we always fetch only a single parent merge commit, so we do not have access
212+ /// to the full git history. Luckily, we only need to distinguish between two situations:
213+ /// 1) The current PR made modifications to `target_paths`.
214+ /// In that case, a build is typically necessary.
215+ /// 2) The current PR did not make modifications to `target_paths`.
216+ /// In that case we simply take the latest upstream commit, because on CI there is no need to avoid
217+ /// redownloading.
218+ pub fn check_path_modifications (
219+ git_dir : Option < & Path > ,
220+ config : & GitConfig < ' _ > ,
221+ target_paths : & [ & str ] ,
222+ ci_env : CiEnv ,
223+ ) -> Result < PathFreshness , String > {
224+ assert ! ( !target_paths. is_empty( ) ) ;
225+ for path in target_paths {
226+ assert ! ( Path :: new( path. trim_start_matches( ":!" ) ) . is_relative( ) ) ;
227+ }
228+
229+ let upstream_sha = if matches ! ( ci_env, CiEnv :: GitHubActions ) {
230+ // Here the situation is different for PR CI and try/auto CI.
231+ // For PR CI, we have the following history:
232+ // <merge commit made by GitHub>
233+ // 1-N PR commits
234+ // upstream merge commit made by bors
235+ //
236+ // For try/auto CI, we have the following history:
237+ // <**non-upstream** merge commit made by bors>
238+ // 1-N PR commits
239+ // upstream merge commit made by bors
240+ //
241+ // But on both cases, HEAD should be a merge commit.
242+ // So if HEAD contains modifications of `target_paths`, our PR has modified
243+ // them. If not, we can use the only available upstream commit for downloading
244+ // artifacts.
245+
246+ // Do not include HEAD, as it is never an upstream commit
247+ get_closest_upstream_commit ( git_dir, config, ci_env) ?
248+ } else {
249+ // Outside CI, we have to find the most recent upstream commit that
250+ // modified the set of paths, to have an upstream reference.
251+ let upstream_sha = get_latest_commit_that_modified_files (
252+ git_dir,
253+ target_paths,
254+ config. git_merge_commit_email ,
255+ ) ?;
256+ let Some ( upstream_sha) = upstream_sha else {
257+ eprintln ! ( "No upstream commit that modified paths {target_paths:?} found." ) ;
258+ eprintln ! ( "Try to fetch more upstream history." ) ;
259+ return Err ( "No upstream commit with modifications found" . to_string ( ) ) ;
260+ } ;
261+ upstream_sha
262+ } ;
263+
264+ if has_changed_since ( git_dir, & upstream_sha, target_paths) {
265+ Ok ( PathFreshness :: HasLocalModifications { upstream : upstream_sha } )
266+ } else {
267+ Ok ( PathFreshness :: LastModifiedUpstream { upstream : upstream_sha } )
268+ }
269+ }
270+
271+ /// Returns true if any of the passed `paths` have changed since the `base` commit.
272+ pub fn has_changed_since ( git_dir : Option < & Path > , base : & str , paths : & [ & str ] ) -> bool {
273+ let mut git = Command :: new ( "git" ) ;
274+
275+ if let Some ( git_dir) = git_dir {
276+ git. current_dir ( git_dir) ;
277+ }
278+
279+ git. args ( [ "diff-index" , "--quiet" , base, "--" ] ) . args ( paths) ;
280+
281+ // Exit code 0 => no changes
282+ // Exit code 1 => some changes were detected
283+ !git. status ( ) . expect ( "cannot run git diff-index" ) . success ( )
284+ }
285+
286+ /// Returns the latest commit that modified `target_paths`, or `None` if no such commit was found.
287+ /// If `author` is `Some`, only considers commits made by that author.
288+ fn get_latest_commit_that_modified_files (
289+ git_dir : Option < & Path > ,
290+ target_paths : & [ & str ] ,
291+ author : & str ,
292+ ) -> Result < Option < String > , String > {
293+ let mut git = Command :: new ( "git" ) ;
294+
295+ if let Some ( git_dir) = git_dir {
296+ git. current_dir ( git_dir) ;
297+ }
298+
299+ git. args ( [ "rev-list" , "-n1" , "--first-parent" , "HEAD" , "--author" , author] ) ;
300+
301+ if !target_paths. is_empty ( ) {
302+ git. arg ( "--" ) . args ( target_paths) ;
303+ }
304+ let output = output_result ( & mut git) ?. trim ( ) . to_owned ( ) ;
305+ if output. is_empty ( ) { Ok ( None ) } else { Ok ( Some ( output) ) }
306+ }
307+
308+ /// Returns the most recent commit found in the local history that should definitely
309+ /// exist upstream. We identify upstream commits by the e-mail of the commit author.
310+ ///
311+ /// If `include_head` is false, the HEAD (current) commit will be ignored and only
312+ /// its parents will be searched. This is useful for try/auto CI, where HEAD is
313+ /// actually a commit made by bors, although it is not upstream yet.
314+ fn get_closest_upstream_commit (
315+ git_dir : Option < & Path > ,
316+ config : & GitConfig < ' _ > ,
317+ env : CiEnv ,
318+ ) -> Result < String , String > {
319+ let mut git = Command :: new ( "git" ) ;
320+
321+ if let Some ( git_dir) = git_dir {
322+ git. current_dir ( git_dir) ;
323+ }
324+
325+ let base = match env {
326+ CiEnv :: None => "HEAD" ,
327+ CiEnv :: GitHubActions => {
328+ // On CI, we always have a merge commit at the tip.
329+ // We thus skip it, because although it can be creatd by
330+ // `config.git_merge_commit_email`, it should not be upstream.
331+ "HEAD^1"
332+ }
333+ } ;
334+ git. args ( [
335+ "rev-list" ,
336+ & format ! ( "--author={}" , config. git_merge_commit_email) ,
337+ "-n1" ,
338+ "--first-parent" ,
339+ & base,
340+ ] ) ;
341+
342+ Ok ( output_result ( & mut git) ?. trim ( ) . to_owned ( ) )
343+ }
344+
345+ /// Returns the files that have been modified in the current branch compared to the master branch.
346+ /// This includes committed changes, uncommitted changes, and changes that are not even staged.
347+ ///
171348/// The `extensions` parameter can be used to filter the files by their extension.
172349/// Does not include removed files.
173350/// If `extensions` is empty, all files will be returned.
@@ -176,7 +353,7 @@ pub fn get_git_modified_files(
176353 git_dir : Option < & Path > ,
177354 extensions : & [ & str ] ,
178355) -> Result < Vec < String > , String > {
179- let merge_base = get_closest_merge_commit ( git_dir, config, & [ ] ) ?;
356+ let merge_base = get_closest_upstream_commit ( git_dir, config, CiEnv :: None ) ?;
180357
181358 let mut git = Command :: new ( "git" ) ;
182359 if let Some ( git_dir) = git_dir {
0 commit comments