22#include "bloom.h"
33#include "builtin.h"
44#include "commit-graph.h"
5+ #include "commit-slab.h"
56#include "commit.h"
67#include "config.h"
7- #include "environment.h"
88#include "diff.h"
99#include "diffcore.h"
1010#include "environment.h"
11+ #include "ewah/ewok.h"
1112#include "hashmap.h"
1213#include "hex.h"
13- #include "log-tree.h"
1414#include "object-name.h"
1515#include "object.h"
1616#include "parse-options.h"
17+ #include "prio-queue.h"
1718#include "quote.h"
1819#include "repository.h"
1920#include "revision.h"
2021
22+ /* Remember to update object flag allocation in object.h */
23+ #define PARENT1 (1u<<16) /* used instead of SEEN */
24+ #define PARENT2 (1u<<17) /* used instead of BOTTOM, BOUNDARY */
25+
2126struct last_modified_entry {
2227 struct hashmap_entry hashent ;
2328 struct object_id oid ;
2429 struct bloom_key key ;
30+ size_t diff_idx ;
2531 const char path [FLEX_ARRAY ];
2632};
2733
@@ -37,13 +43,45 @@ static int last_modified_entry_hashcmp(const void *unused UNUSED,
3743 return strcmp (ent1 -> path , path ? path : ent2 -> path );
3844}
3945
46+ /*
47+ * Hold a bitmap for each commit we're working with. In the bitmap, each bit
48+ * represents a path in `lm->all_paths`. An active bit indicates the path still
49+ * needs to be associated to a commit.
50+ */
51+ define_commit_slab (active_paths_for_commit , struct bitmap * );
52+
4053struct last_modified {
4154 struct hashmap paths ;
4255 struct rev_info rev ;
4356 bool recursive ;
4457 bool show_trees ;
58+
59+ const char * * all_paths ;
60+ size_t all_paths_nr ;
61+ struct active_paths_for_commit active_paths ;
62+
63+ /* 'scratch' to avoid allocating a bitmap every process_parent() */
64+ struct bitmap * scratch ;
4565};
4666
67+ static struct bitmap * active_paths_for (struct last_modified * lm , struct commit * c )
68+ {
69+ struct bitmap * * bitmap = active_paths_for_commit_at (& lm -> active_paths , c );
70+ if (!* bitmap )
71+ * bitmap = bitmap_word_alloc (lm -> all_paths_nr / BITS_IN_EWORD + 1 );
72+
73+ return * bitmap ;
74+ }
75+
76+ static void active_paths_free (struct last_modified * lm , struct commit * c )
77+ {
78+ struct bitmap * * bitmap = active_paths_for_commit_at (& lm -> active_paths , c );
79+ if (* bitmap ) {
80+ bitmap_free (* bitmap );
81+ * bitmap = NULL ;
82+ }
83+ }
84+
4785static void last_modified_release (struct last_modified * lm )
4886{
4987 struct hashmap_iter iter ;
@@ -54,6 +92,8 @@ static void last_modified_release(struct last_modified *lm)
5492
5593 hashmap_clear_and_free (& lm -> paths , struct last_modified_entry , hashent );
5694 release_revisions (& lm -> rev );
95+
96+ free (lm -> all_paths );
5797}
5898
5999struct last_modified_callback_data {
@@ -146,7 +186,7 @@ static void mark_path(const char *path, const struct object_id *oid,
146186 * Is it arriving at a version of interest, or is it from a side branch
147187 * which did not contribute to the final state?
148188 */
149- if (!oideq (oid , & ent -> oid ))
189+ if (oid && !oideq (oid , & ent -> oid ))
150190 return ;
151191
152192 last_modified_emit (data -> lm , path , data -> commit );
@@ -196,7 +236,17 @@ static void last_modified_diff(struct diff_queue_struct *q,
196236 }
197237}
198238
199- static bool maybe_changed_path (struct last_modified * lm , struct commit * origin )
239+ static void pass_to_parent (struct bitmap * c ,
240+ struct bitmap * p ,
241+ size_t pos )
242+ {
243+ bitmap_unset (c , pos );
244+ bitmap_set (p , pos );
245+ }
246+
247+ static bool maybe_changed_path (struct last_modified * lm ,
248+ struct commit * origin ,
249+ struct bitmap * active )
200250{
201251 struct bloom_filter * filter ;
202252 struct last_modified_entry * ent ;
@@ -213,49 +263,212 @@ static bool maybe_changed_path(struct last_modified *lm, struct commit *origin)
213263 return true;
214264
215265 hashmap_for_each_entry (& lm -> paths , & iter , ent , hashent ) {
266+ if (active && !bitmap_get (active , ent -> diff_idx ))
267+ continue ;
268+
216269 if (bloom_filter_contains (filter , & ent -> key ,
217270 lm -> rev .bloom_filter_settings ))
218271 return true;
219272 }
220273 return false;
221274}
222275
276+ static void process_parent (struct last_modified * lm ,
277+ struct prio_queue * queue ,
278+ struct commit * c , struct bitmap * active_c ,
279+ struct commit * parent , int parent_i )
280+ {
281+ struct bitmap * active_p ;
282+
283+ repo_parse_commit (lm -> rev .repo , parent );
284+ active_p = active_paths_for (lm , parent );
285+
286+ /*
287+ * The first time entering this function for this commit (i.e. first parent)
288+ * see if Bloom filters will tell us it's worth to do the diff.
289+ */
290+ if (parent_i || maybe_changed_path (lm , c , active_c )) {
291+ diff_tree_oid (& parent -> object .oid ,
292+ & c -> object .oid , "" , & lm -> rev .diffopt );
293+ diffcore_std (& lm -> rev .diffopt );
294+ }
295+
296+ /*
297+ * Test each path for TREESAME-ness against the parent. If a path is
298+ * TREESAME, pass it on to this parent.
299+ *
300+ * First, collect all paths that are *not* TREESAME in 'scratch'.
301+ * Then, pass paths that *are* TREESAME and active to the parent.
302+ */
303+ for (int i = 0 ; i < diff_queued_diff .nr ; i ++ ) {
304+ struct diff_filepair * fp = diff_queued_diff .queue [i ];
305+ const char * path = fp -> two -> path ;
306+ struct last_modified_entry * ent =
307+ hashmap_get_entry_from_hash (& lm -> paths , strhash (path ), path ,
308+ struct last_modified_entry , hashent );
309+ if (ent ) {
310+ size_t k = ent -> diff_idx ;
311+ if (bitmap_get (active_c , k ))
312+ bitmap_set (lm -> scratch , k );
313+ }
314+ }
315+ for (size_t i = 0 ; i < lm -> all_paths_nr ; i ++ ) {
316+ if (bitmap_get (active_c , i ) && !bitmap_get (lm -> scratch , i ))
317+ pass_to_parent (active_c , active_p , i );
318+ }
319+
320+ /*
321+ * If parent has any active paths, put it on the queue (if not already).
322+ */
323+ if (!bitmap_is_empty (active_p ) && !(parent -> object .flags & PARENT1 )) {
324+ parent -> object .flags |= PARENT1 ;
325+ prio_queue_put (queue , parent );
326+ }
327+ if (!(parent -> object .flags & PARENT1 ))
328+ active_paths_free (lm , parent );
329+
330+ memset (lm -> scratch -> words , 0x0 , lm -> scratch -> word_alloc );
331+ diff_queue_clear (& diff_queued_diff );
332+ }
333+
223334static int last_modified_run (struct last_modified * lm )
224335{
336+ int max_count , queue_popped = 0 ;
337+ struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
338+ struct prio_queue not_queue = { compare_commits_by_gen_then_commit_date };
339+ struct commit_list * list ;
225340 struct last_modified_callback_data data = { .lm = lm };
226341
227342 lm -> rev .diffopt .output_format = DIFF_FORMAT_CALLBACK ;
228343 lm -> rev .diffopt .format_callback = last_modified_diff ;
229344 lm -> rev .diffopt .format_callback_data = & data ;
345+ lm -> rev .no_walk = 1 ;
230346
231347 prepare_revision_walk (& lm -> rev );
232348
233- while (hashmap_get_size (& lm -> paths )) {
234- data .commit = get_revision (& lm -> rev );
235- if (!data .commit )
236- BUG ("paths remaining beyond boundary in last-modified" );
349+ max_count = lm -> rev .max_count ;
350+
351+ init_active_paths_for_commit (& lm -> active_paths );
352+ lm -> scratch = bitmap_word_alloc (lm -> all_paths_nr );
353+
354+ /*
355+ * lm->rev.commits holds the set of boundary commits for our walk.
356+ *
357+ * Loop through each such commit, and place it in the appropriate queue.
358+ */
359+ for (list = lm -> rev .commits ; list ; list = list -> next ) {
360+ struct commit * c = list -> item ;
361+
362+ if (c -> object .flags & BOTTOM ) {
363+ prio_queue_put (& not_queue , c );
364+ c -> object .flags |= PARENT2 ;
365+ } else if (!(c -> object .flags & PARENT1 )) {
366+ /*
367+ * If the commit is a starting point (and hasn't been
368+ * seen yet), then initialize the set of interesting
369+ * paths, too.
370+ */
371+ struct bitmap * active ;
372+
373+ prio_queue_put (& queue , c );
374+ c -> object .flags |= PARENT1 ;
375+
376+ active = active_paths_for (lm , c );
377+ for (size_t i = 0 ; i < lm -> all_paths_nr ; i ++ )
378+ bitmap_set (active , i );
379+ }
380+ }
237381
238- if (data .commit -> object .flags & BOUNDARY ) {
382+ while (queue .nr ) {
383+ int parent_i ;
384+ struct commit_list * p ;
385+ struct commit * c = prio_queue_get (& queue );
386+ struct bitmap * active_c = active_paths_for (lm , c );
387+
388+ if ((0 <= max_count && max_count < ++ queue_popped ) ||
389+ (c -> object .flags & PARENT2 )) {
390+ /*
391+ * Either a boundary commit, or we have already seen too
392+ * many others. Either way, stop here.
393+ */
394+ c -> object .flags |= PARENT2 | BOUNDARY ;
395+ data .commit = c ;
239396 diff_tree_oid (lm -> rev .repo -> hash_algo -> empty_tree ,
240- & data . commit -> object .oid , "" ,
241- & lm -> rev .diffopt );
397+ & c -> object .oid ,
398+ "" , & lm -> rev .diffopt );
242399 diff_flush (& lm -> rev .diffopt );
400+ goto cleanup ;
401+ }
243402
244- break ;
403+ /*
404+ * Otherwise, make sure that 'c' isn't reachable from anything
405+ * in the '--not' queue.
406+ */
407+ repo_parse_commit (lm -> rev .repo , c );
408+
409+ while (not_queue .nr ) {
410+ struct commit_list * np ;
411+ struct commit * n = prio_queue_get (& not_queue );
412+
413+ repo_parse_commit (lm -> rev .repo , n );
414+
415+ for (np = n -> parents ; np ; np = np -> next ) {
416+ if (!(np -> item -> object .flags & PARENT2 )) {
417+ prio_queue_put (& not_queue , np -> item );
418+ np -> item -> object .flags |= PARENT2 ;
419+ }
420+ }
421+
422+ if (commit_graph_generation (n ) < commit_graph_generation (c ))
423+ break ;
245424 }
246425
247- if (!maybe_changed_path (lm , data .commit ))
248- continue ;
426+ /*
427+ * Look at each parent and pass on each path that's TREESAME
428+ * with that parent. Stop early when no active paths remain.
429+ */
430+ for (p = c -> parents , parent_i = 0 ; p ; p = p -> next , parent_i ++ ) {
431+ process_parent (lm , & queue ,
432+ c , active_c ,
433+ p -> item , parent_i );
434+
435+ if (bitmap_is_empty (active_c ))
436+ break ;
437+ }
438+
439+ /*
440+ * Paths that remain active, or not TREESAME with any parent,
441+ * were changed by 'c'.
442+ */
443+ if (!bitmap_is_empty (active_c )) {
444+ data .commit = c ;
445+ for (size_t i = 0 ; i < lm -> all_paths_nr ; i ++ ) {
446+ if (bitmap_get (active_c , i ))
447+ mark_path (lm -> all_paths [i ], NULL , & data );
448+ }
449+ }
249450
250- log_tree_commit (& lm -> rev , data .commit );
451+ cleanup :
452+ active_paths_free (lm , c );
251453 }
252454
455+ if (hashmap_get_size (& lm -> paths ))
456+ BUG ("paths remaining beyond boundary in last-modified" );
457+
458+ clear_prio_queue (& not_queue );
459+ clear_prio_queue (& queue );
460+ clear_active_paths_for_commit (& lm -> active_paths );
461+ bitmap_free (lm -> scratch );
462+
253463 return 0 ;
254464}
255465
256466static int last_modified_init (struct last_modified * lm , struct repository * r ,
257467 const char * prefix , int argc , const char * * argv )
258468{
469+ struct hashmap_iter iter ;
470+ struct last_modified_entry * ent ;
471+
259472 hashmap_init (& lm -> paths , last_modified_entry_hashcmp , NULL , 0 );
260473
261474 repo_init_revisions (r , & lm -> rev , prefix );
@@ -280,6 +493,13 @@ static int last_modified_init(struct last_modified *lm, struct repository *r,
280493 if (populate_paths_from_revs (lm ) < 0 )
281494 return error (_ ("unable to setup last-modified" ));
282495
496+ CALLOC_ARRAY (lm -> all_paths , hashmap_get_size (& lm -> paths ));
497+ lm -> all_paths_nr = 0 ;
498+ hashmap_for_each_entry (& lm -> paths , & iter , ent , hashent ) {
499+ ent -> diff_idx = lm -> all_paths_nr ++ ;
500+ lm -> all_paths [ent -> diff_idx ] = ent -> path ;
501+ }
502+
283503 return 0 ;
284504}
285505
0 commit comments