4343import java .util .stream .Stream ;
4444
4545import org .apache .commons .lang3 .SystemUtils ;
46+ import org .apache .lucene .document .DateTools ;
4647import org .apache .lucene .document .Document ;
48+ import org .apache .lucene .index .IndexableField ;
4749import org .apache .lucene .queryparser .classic .ParseException ;
4850import org .apache .lucene .search .ScoreDoc ;
4951
5052import org .eclipse .jgit .api .Git ;
5153import org .eclipse .jgit .api .MergeCommand ;
54+ import org .eclipse .jgit .api .errors .GitAPIException ;
5255import org .eclipse .jgit .lib .ObjectId ;
56+ import org .eclipse .jgit .revwalk .RevCommit ;
5357import org .junit .jupiter .api .AfterEach ;
5458import org .junit .jupiter .api .BeforeEach ;
5559import org .junit .jupiter .api .Test ;
@@ -171,6 +175,7 @@ void setUpClass() throws Exception {
171175
172176 @ AfterEach
173177 void tearDownClass () throws Exception {
178+ env .releaseIndexSearchers ();
174179 repository .destroy ();
175180 }
176181
@@ -312,21 +317,28 @@ void testIndexPath() throws IOException {
312317
313318 @ Test
314319 void testGetLastRev () throws IOException , ParseException {
320+ // IndexDatabase.getDocument() searches the index, so refresh the IndexSearcher objects
321+ // to get fresh results.
322+ env .maybeRefreshIndexSearchers ();
315323 Document doc = IndexDatabase .getDocument (Paths .get (repository .getSourceRoot (),
316324 "git" , "main.c" ).toFile ());
317325 assertNotNull (doc );
318326 assertEquals ("aa35c25882b9a60a97758e0ceb276a3f8cb4ae3a" , doc .get (QueryBuilder .LASTREV ));
319327 }
320328
321- static void changeFileAndCommit (Git git , File file , String comment ) throws Exception {
329+ static RevCommit changeFileAndCommit (Git git , File file , String comment ) throws Exception {
322330 String authorName = "Foo Bar" ;
323331 String authorEmail = "foobar@example.com" ;
324332
325333 try (FileOutputStream fos = new FileOutputStream (file , true )) {
326334 fos .write (comment .getBytes (StandardCharsets .UTF_8 ));
327335 }
328336
329- git .commit ().setMessage (comment ).setAuthor (authorName , authorEmail ).setAll (true ).call ();
337+ return commitFile (git , comment , authorName , authorEmail );
338+ }
339+
340+ private static RevCommit commitFile (Git git , String comment , String authorName , String authorEmail ) throws GitAPIException {
341+ return git .commit ().setMessage (comment ).setAuthor (authorName , authorEmail ).setAll (true ).call ();
330342 }
331343
332344 private void addFileAndCommit (Git git , String newFileName , File repositoryRoot , String message ) throws Exception {
@@ -338,7 +350,7 @@ private void addFileAndCommit(Git git, String newFileName, File repositoryRoot,
338350 fos .write ("foo bar foo bar foo bar" .getBytes (StandardCharsets .UTF_8 ));
339351 }
340352 git .add ().addFilepattern (newFileName ).call ();
341- git . commit (). setMessage ( message ). setAuthor ( "foo bar" , "foobar@example.com" ). setAll ( true ). call ( );
353+ commitFile ( git , message , "foo bar" , "foobar@example.com" );
342354 }
343355
344356 private void addMergeCommit (Git git , File repositoryRoot ) throws Exception {
@@ -1019,6 +1031,7 @@ void testAnnotationCacheProjectTunable(boolean useAnnotationCache, boolean isHis
10191031 // cleanup
10201032 gitProject .setHistoryBasedReindex (projectUseAnnotationOrig );
10211033 env .setDataRoot (dataRootOrig );
1034+ env .releaseIndexSearchers ();
10221035 IOUtils .removeRecursive (dataRoot );
10231036 }
10241037
@@ -1076,4 +1089,134 @@ void testHistoryCacheForFileBasedRepository() throws Exception {
10761089 assertFalse (otherFile .exists ());
10771090 assertFalse (historyGuru .hasHistoryCacheForFile (otherFile ));
10781091 }
1092+
1093+ /**
1094+ * When incrementally indexing across Git changesets which modify the same file however the outcome
1095+ * is no change to the file (the changes nullify each other), IndexDatabase needs to filter these files
1096+ * out because Git does it as well. Otherwise, the indexer would attempt to add the document with
1097+ * time stamp of pre-existing document which would make indexing of the related project fail.
1098+ * This test simulates this case.
1099+ * <p>
1100+ * The strategy of this test is as follows:
1101+ * <ol>
1102+ * <li>initialize parent repository</li>
1103+ * <li>change+add file <code>foo.txt</code> in parent repository, commit</li>
1104+ * <li>change+add file <code>bar.txt</code> in parent repository, commit</li>
1105+ * <li>clone parent repository</li>
1106+ * <li>index the clone</li>
1107+ * <li>change <code>foo.txt</code> in parent repository, commit</li>
1108+ * <li>change <code>bar.txt</code> in parent repository, commit</li>
1109+ * <li>revert the change done to foo.txt in the last commit in parent repository</li>
1110+ * <li>pull the changes to the clone</li>
1111+ * <li>index the clone (incremental)</li>
1112+ * </ol>
1113+ * </p>
1114+ * Before the fix, the last reindex resulted in RuntimeException caused by the addition of the <code>foo.txt</code>
1115+ * file with the time stamp of the file before the last changes. This is because history based reindex
1116+ * extracts the list of files from the changesets, however Git does not update the file if the changes
1117+ * were nullified.
1118+ */
1119+ @ Test
1120+ void testNullifiedChanges () throws Exception {
1121+ File parentRepositoryRoot = new File (env .getSourceRootPath (), "gitNoChangeParent" );
1122+ assertTrue (parentRepositoryRoot .mkdir ());
1123+
1124+ env .setHistoryBasedReindex (true );
1125+
1126+ final String barName = "bar.txt" ;
1127+ final String repoName = "gitNoChange" ;
1128+ Path repositoryRootPath = Path .of (env .getSourceRootPath (), repoName );
1129+ List <String > projectList = List .of (File .separator + repoName );
1130+ try (Git gitParent = Git .init ().setDirectory (parentRepositoryRoot ).call ()) {
1131+ // Create initial commits for the files in the parent repository.
1132+ final String fooName = "foo.txt" ;
1133+ File fooFile = new File (parentRepositoryRoot , fooName );
1134+ if (!fooFile .createNewFile ()) {
1135+ throw new IOException ("Could not create file " + fooFile );
1136+ }
1137+ gitParent .add ().addFilepattern (fooName ).call ();
1138+ changeFileAndCommit (gitParent , fooFile , "first foo" );
1139+
1140+ File barFile = new File (parentRepositoryRoot , barName );
1141+ if (!barFile .createNewFile ()) {
1142+ throw new IOException ("Could not create file " + barFile );
1143+ }
1144+ gitParent .add ().addFilepattern (barName ).call ();
1145+ changeFileAndCommit (gitParent , barFile , "first bar" );
1146+
1147+ // Clone the repository at this point so that subsequent changes can be pulled later on.
1148+ final String cloneUrl = parentRepositoryRoot .toURI ().toString ();
1149+ try (Git gitClone = Git .cloneRepository ()
1150+ .setURI (cloneUrl )
1151+ .setDirectory (repositoryRootPath .toFile ())
1152+ .call ()) {
1153+
1154+ // Perform initial index. This is important so that history cache for the repository
1155+ // is created. It contains ID of the last indexed changeset which so that it can be
1156+ // used during the final reindex.
1157+ indexer .prepareIndexer (
1158+ env , true , true ,
1159+ null , null );
1160+ env .setRepositories (new ArrayList <>(HistoryGuru .getInstance ().getRepositories ()));
1161+ env .generateProjectRepositoriesMap ();
1162+ Project project = Project .getByName (repoName );
1163+ assertNotNull (project );
1164+ List <RepositoryInfo > repositoryInfos = env .getProjectRepositoriesMap ().get (project );
1165+ assertEquals (1 , repositoryInfos .size ());
1166+ assertEquals ("git" , repositoryInfos .get (0 ).getType ());
1167+ indexer .doIndexerExecution (projectList , null );
1168+
1169+ // Change the parent repository so that it contains nullified change to the foo.txt file.
1170+ final String data = "change foo" ;
1171+ gitParent .add ().addFilepattern (fooName ).call ();
1172+ RevCommit commit = changeFileAndCommit (gitParent , fooFile , data );
1173+
1174+ // Also throw another file into the mix so that it resembles reality a bit more.
1175+ changeFileAndCommit (gitParent , barFile , "change bar" );
1176+
1177+ // Revert the changes done to foo.txt so that the changes got nullified for the subsequent pull.
1178+ gitParent .revert ().include (commit ).call ();
1179+
1180+ // Bring the changes to the repository to be indexed. Again, done for better simulation.
1181+ gitClone .pull ().call ();
1182+ }
1183+ }
1184+
1185+ // Final reindex. This should discover the changes done to the clone and index them.
1186+ indexer .prepareIndexer (
1187+ env , true , true ,
1188+ null , null );
1189+ //
1190+ // Use IndexDatabase instead of indexer.doIndexerExecution(projectList, null) because
1191+ // it will detect the indexing failure via RuntimeException. Also, it will be possible
1192+ // to determine via mocking whether history based reindex was used.
1193+ //
1194+ IndexDownArgsFactory factory = new IndexDownArgsFactory ();
1195+ IndexDownArgsFactory spyFactory = spy (factory );
1196+ IndexDownArgs args = new IndexDownArgs ();
1197+ // In this case the getIndexDownArgs() should be called from update() just once so this will suffice.
1198+ when (spyFactory .getIndexDownArgs ()).thenReturn (args );
1199+ Project project = env .getProjects ().get (repoName );
1200+ assertNotNull (project );
1201+ IndexDatabase idbOrig = new IndexDatabase (project , spyFactory );
1202+ assertNotNull (idbOrig );
1203+ IndexDatabase idb = spy (idbOrig );
1204+ idb .update ();
1205+ // Verify history based reindex was used.
1206+ checkIndexDown (true , idb );
1207+
1208+ // Check that the document for bar.txt was updated. Serves as a smoke test.
1209+ File barFile = new File (repositoryRootPath .toString (), barName );
1210+ assertTrue (barFile .exists ());
1211+ // IndexDatabase.getDocument() performs index search to retrieve the document, so the corresponding
1212+ // IndexSearcher object has to be bumped in order to get fresh document.
1213+ env .maybeRefreshIndexSearchers ();
1214+ Document barDoc = IndexDatabase .getDocument (barFile );
1215+ assertNotNull (barDoc );
1216+ IndexableField field = barDoc .getField (QueryBuilder .DATE );
1217+ String docDate = field .stringValue ();
1218+ // Need to use the same resolution as in AnalyzerGuru#populateDocument().
1219+ String fileDate = DateTools .timeToString (barFile .lastModified (), DateTools .Resolution .MILLISECOND );
1220+ assertEquals (fileDate , docDate );
1221+ }
10791222}
0 commit comments