Skip to content

Commit 8df660a

Browse files
author
Vladimir Kotal
committed
detect repositories for each source root subdirectory in parallel
fixes #773
1 parent d5999cd commit 8df660a

File tree

6 files changed

+143
-19
lines changed

6 files changed

+143
-19
lines changed

opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ public final class Configuration {
195195
private int indexingParallelism;
196196
private int historyParallelism;
197197
private int historyRenamedParallelism;
198+
private int repositorySearchParallelism;
198199
private boolean tagsEnabled;
199200
private int hitsPerPage;
200201
private int cachePages;
@@ -238,9 +239,9 @@ public final class Configuration {
238239
* The directory hierarchy depth to limit the scanning for repositories.
239240
* E.g. if the /mercurial/ directory (relative to source root) is a repository
240241
* and /mercurial/usr/closed/ is sub-repository, the latter will be discovered
241-
* only if the depth is set to 3 or greater.
242+
* only if the depth is set to 2 or greater.
242243
*/
243-
public static final int defaultScanningDepth = 3;
244+
public static final int defaultScanningDepth = 2;
244245

245246
/**
246247
* The name of the eftar file relative to the <var>DATA_ROOT</var>, which
@@ -1145,6 +1146,14 @@ public void setHistoryRenamedParallelism(int value) {
11451146
this.historyRenamedParallelism = value > 0 ? value : 0;
11461147
}
11471148

1149+
public int getRepositorySearchParallelism() {
1150+
return repositorySearchParallelism;
1151+
}
1152+
1153+
public void setRepositorySearchParallelism(int value) {
1154+
this.repositorySearchParallelism = value > 0 ? value : 0;
1155+
}
1156+
11481157
public boolean isTagsEnabled() {
11491158
return this.tagsEnabled;
11501159
}

opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,6 +1110,17 @@ public int getHistoryRenamedParallelism() {
11101110
parallelism;
11111111
}
11121112

1113+
/**
1114+
* Gets the value of {@link Configuration#getRepositorySearchParallelism()} -- or
1115+
* if zero, then as a default gets the number of available processors.
1116+
* @return a natural number &gt;= 1
1117+
*/
1118+
public int getRepositorySearchParallelism() {
1119+
int parallelism = syncReadConfiguration(Configuration::getRepositorySearchParallelism);
1120+
return parallelism < 1 ? Runtime.getRuntime().availableProcessors() :
1121+
parallelism;
1122+
}
1123+
11131124
public boolean isTagsEnabled() {
11141125
return syncReadConfiguration(Configuration::isTagsEnabled);
11151126
}

opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
21+
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
2222
* Portions Copyright (c) 2017-2020, Chris Fraire <cfraire@me.com>.
2323
*/
2424
package org.opengrok.indexer.history;
@@ -41,6 +41,7 @@
4141
import java.util.concurrent.CountDownLatch;
4242
import java.util.concurrent.ExecutorService;
4343
import java.util.concurrent.Executors;
44+
import java.util.concurrent.Future;
4445
import java.util.concurrent.ThreadFactory;
4546
import java.util.logging.Level;
4647
import java.util.logging.Logger;
@@ -88,15 +89,12 @@ public final class HistoryGuru {
8889
*/
8990
private final Map<String, String> repositoryRoots = new ConcurrentHashMap<>();
9091

91-
private final int scanningDepth;
92-
9392
/**
9493
* Creates a new instance of HistoryGuru, and try to set the default source
9594
* control system.
9695
*/
9796
private HistoryGuru() {
9897
env = RuntimeEnvironment.getInstance();
99-
scanningDepth = env.getScanningDepth();
10098

10199
HistoryCache cache = null;
102100
if (env.useHistoryCache()) {
@@ -435,7 +433,7 @@ private Collection<RepositoryInfo> addRepositories(File[] files,
435433
"Failed to get sub directories for ''{0}'', " +
436434
"check access permissions.",
437435
file.getAbsolutePath());
438-
} else if (depth <= scanningDepth) {
436+
} else if (depth <= env.getScanningDepth()) {
439437
repoList.addAll(addRepositories(subFiles,
440438
allowedNesting, depth + 1, isNested));
441439
}
@@ -453,7 +451,7 @@ private Collection<RepositoryInfo> addRepositories(File[] files,
453451
LOGGER.log(Level.WARNING,
454452
"Failed to get sub directories for ''{0}'', check access permissions.",
455453
file.getAbsolutePath());
456-
} else if (depth <= scanningDepth) {
454+
} else if (depth <= env.getScanningDepth()) {
457455
// Search down to a limit -- if not: too much
458456
// stat'ing for huge Mercurial repositories
459457
repoList.addAll(addRepositories(subFiles,
@@ -480,7 +478,25 @@ private Collection<RepositoryInfo> addRepositories(File[] files,
480478
* @return collection of added repositories
481479
*/
482480
public Collection<RepositoryInfo> addRepositories(File[] files) {
483-
return addRepositories(files, env.getNestingMaximum(), 0, false);
481+
ExecutorService executor = env.getIndexerParallelizer().getRepositorySearchExecutor();
482+
List<Future<Collection<RepositoryInfo>>> futures = new ArrayList<>();
483+
for (File file: files) {
484+
futures.add(executor.submit(() -> addRepositories(new File[]{file},
485+
env.getNestingMaximum(), 0, false)));
486+
}
487+
488+
List<RepositoryInfo> repoList = new ArrayList<>();
489+
futures.forEach(future -> {
490+
try {
491+
repoList.addAll(future.get());
492+
} catch (Exception e) {
493+
LOGGER.log(Level.WARNING, "failed to get results of repository scan");
494+
}
495+
});
496+
497+
env.getIndexerParallelizer().bounceRepositorySearchExecutor();
498+
499+
return repoList;
484500
}
485501

486502
/**

opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -336,12 +336,16 @@ public static void main(String[] argv) {
336336
// Create history cache first.
337337
if (searchRepositories) {
338338
if (searchPaths.isEmpty()) {
339-
searchPaths.add(env.getSourceRootPath());
340-
} else {
341-
searchPaths = searchPaths.stream().
342-
map(t -> Paths.get(env.getSourceRootPath(), t).toString()).
343-
collect(Collectors.toSet());
339+
String[] dirs = env.getSourceRootFile().
340+
list((f, name) -> f.isDirectory() && env.getPathAccepter().accept(f));
341+
if (dirs != null) {
342+
searchPaths.addAll(Arrays.asList(dirs));
343+
}
344344
}
345+
346+
searchPaths = searchPaths.stream().
347+
map(t -> Paths.get(env.getSourceRootPath(), t).toString()).
348+
collect(Collectors.toSet());
345349
}
346350
getInstance().prepareIndexer(env, searchPaths, addProjects,
347351
createDict, runIndex, subFiles, new ArrayList<>(repositories));
@@ -609,7 +613,7 @@ public static String[] parseOptions(String[] argv) throws ParseException {
609613
runIndex = false);
610614

611615
parser.on("--nestingMaximum", "=number",
612-
"Maximum of nested repositories. Default is 1.").execute(v ->
616+
"Maximum depth of nested repositories. Default is 1.").execute(v ->
613617
cfg.setNestingMaximum((Integer) v));
614618

615619
parser.on("-O", "--optimize", "=on|off", ON_OFF, Boolean.class,
@@ -739,10 +743,13 @@ public static String[] parseOptions(String[] argv) throws ParseException {
739743
cfg.setWebappLAF((String) stylePath));
740744

741745
parser.on("-T", "--threads", "=number", Integer.class,
742-
"The number of threads to use for index generation. By default the number",
743-
"of threads will be set to the number of available CPUs. This influences the number",
744-
"of spawned ctags processes as well.").execute(threadCount ->
745-
cfg.setIndexingParallelism((Integer) threadCount));
746+
"The number of threads to use for index generation and repository scan.",
747+
"By default the number of threads will be set to the number of available",
748+
"CPUs. This influences the number of spawned ctags processes as well.").
749+
execute(threadCount -> {
750+
cfg.setIndexingParallelism((Integer) threadCount);
751+
cfg.setRepositorySearchParallelism((Integer) threadCount);
752+
});
746753

747754
parser.on("-t", "--tabSize", "=number", Integer.class,
748755
"Default tab size to use (number of spaces per tab character).")

opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexerParallelizer.java

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
/*
2121
* Copyright (c) 2017-2020, Chris Fraire <cfraire@me.com>.
22+
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
2223
*/
2324

2425
package org.opengrok.indexer.index;
@@ -61,6 +62,7 @@ public class IndexerParallelizer implements AutoCloseable {
6162
private LazilyInstantiate<ExecutorService> lzHistoryExecutor;
6263
private LazilyInstantiate<ExecutorService> lzHistoryRenamedExecutor;
6364
private LazilyInstantiate<ExecutorService> lzCtagsWatcherExecutor;
65+
private LazilyInstantiate<ExecutorService> lzRepositorySearchExecutor;
6466

6567
/**
6668
* Initializes a new instance using settings from the specified environment
@@ -84,6 +86,7 @@ public IndexerParallelizer(RuntimeEnvironment env) {
8486
createLazyHistoryExecutor();
8587
createLazyHistoryRenamedExecutor();
8688
createLazyCtagsWatcherExecutor();
89+
createLazyRepositorySearchExecutor();
8790
}
8891

8992
/**
@@ -128,6 +131,13 @@ public ExecutorService getCtagsWatcherExecutor() {
128131
return lzCtagsWatcherExecutor.get();
129132
}
130133

134+
/**
135+
* @return the ExecutorService used for repository scan
136+
*/
137+
public ExecutorService getRepositorySearchExecutor() {
138+
return lzRepositorySearchExecutor.get();
139+
}
140+
131141
/**
132142
* Calls {@link #bounce()}, which prepares for -- but does not start -- new
133143
* pools.
@@ -154,43 +164,75 @@ public void close() {
154164
* call this method satisfactorily too.
155165
*/
156166
public void bounce() {
167+
bounceForkJoinPool();
168+
bounceFixedExecutor();
169+
bounceCtagsPool();
170+
bounceHistoryExecutor();
171+
bounceHistoryRenamedExecutor();
172+
bounceCtagsWatcherExecutor();
173+
bounceRepositorySearchExecutor();
174+
}
175+
176+
private void bounceForkJoinPool() {
157177
if (lzForkJoinPool.isActive()) {
158178
ForkJoinPool formerForkJoinPool = lzForkJoinPool.get();
159179
createLazyForkJoinPool();
160180
formerForkJoinPool.shutdown();
161181
}
182+
}
162183

184+
private void bounceFixedExecutor() {
163185
if (lzFixedExecutor.isActive()) {
164186
ExecutorService formerFixedExecutor = lzFixedExecutor.get();
165187
createLazyFixedExecutor();
166188
formerFixedExecutor.shutdown();
167189
}
190+
}
168191

192+
private void bounceCtagsPool() {
169193
if (lzCtagsPool.isActive()) {
170194
ObjectPool<Ctags> formerCtagsPool = lzCtagsPool.get();
171195
createLazyCtagsPool();
172196
formerCtagsPool.shutdown();
173197
}
198+
}
174199

200+
private void bounceHistoryExecutor() {
175201
if (lzHistoryExecutor.isActive()) {
176202
ExecutorService formerHistoryExecutor = lzHistoryExecutor.get();
177203
createLazyHistoryExecutor();
178204
formerHistoryExecutor.shutdown();
179205
}
206+
}
180207

208+
private void bounceHistoryRenamedExecutor() {
181209
if (lzHistoryRenamedExecutor.isActive()) {
182210
ExecutorService formerHistoryRenamedExecutor = lzHistoryRenamedExecutor.get();
183211
createLazyHistoryRenamedExecutor();
184212
formerHistoryRenamedExecutor.shutdown();
185213
}
214+
}
186215

216+
private void bounceCtagsWatcherExecutor() {
187217
if (lzCtagsWatcherExecutor.isActive()) {
188218
ExecutorService formerCtagsWatcherExecutor = lzCtagsWatcherExecutor.get();
189219
createLazyCtagsWatcherExecutor();
190220
formerCtagsWatcherExecutor.shutdown();
191221
}
192222
}
193223

224+
/**
225+
* Shutdown the executor used for repository search.
226+
* @see #bounce()
227+
*/
228+
public void bounceRepositorySearchExecutor() {
229+
if (lzRepositorySearchExecutor.isActive()) {
230+
ExecutorService formerRepositorySearchExecutor = lzRepositorySearchExecutor.get();
231+
createLazyRepositorySearchExecutor();
232+
formerRepositorySearchExecutor.shutdown();
233+
}
234+
}
235+
194236
private void createLazyForkJoinPool() {
195237
lzForkJoinPool = LazilyInstantiate.using(() ->
196238
new ForkJoinPool(indexingParallelism));
@@ -226,6 +268,11 @@ private void createLazyHistoryRenamedExecutor() {
226268
Executors.newFixedThreadPool(env.getHistoryRenamedParallelism()));
227269
}
228270

271+
private void createLazyRepositorySearchExecutor() {
272+
lzRepositorySearchExecutor = LazilyInstantiate.using(() ->
273+
Executors.newFixedThreadPool(env.getRepositorySearchParallelism()));
274+
}
275+
229276
private class CtagsObjectFactory implements ObjectFactory<Ctags> {
230277

231278
public Ctags createNew() {

opengrok-indexer/src/test/java/org/opengrok/indexer/history/HistoryGuruTest.java

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import java.util.Collection;
3636
import java.util.Collections;
3737
import java.util.List;
38+
import java.util.stream.Collectors;
3839

3940
import org.junit.After;
4041
import org.junit.AfterClass;
@@ -252,4 +253,37 @@ private static void certainlyMkdirs(File file) throws IOException {
252253
throw new IOException("Couldn't mkdirs " + file);
253254
}
254255
}
256+
257+
@Test
258+
@ConditionalRun(RepositoryInstalled.GitInstalled.class)
259+
public void testScanningDepth() throws IOException {
260+
String topLevelDirName = "scanDepthTest";
261+
File repoRoot = new File(repository.getSourceRoot(), topLevelDirName);
262+
certainlyMkdirs(repoRoot);
263+
File repo0 = new File(repoRoot, ".git");
264+
certainlyMkdirs(repo0);
265+
File sub1 = new File(repoRoot, "sub1");
266+
certainlyMkdirs(sub1);
267+
File sub2 = new File(sub1, "sub2");
268+
certainlyMkdirs(sub2);
269+
File sub3 = new File(sub2, ".git");
270+
certainlyMkdirs(sub3);
271+
272+
int originalScanDepth = env.getScanningDepth();
273+
env.setScanningDepth(0);
274+
275+
HistoryGuru instance = HistoryGuru.getInstance();
276+
Collection<RepositoryInfo> addedRepos = instance.addRepositories(
277+
Collections.singleton(Paths.get(repository.getSourceRoot(),topLevelDirName).toString()));
278+
assertEquals("should add to max depth", 1, addedRepos.size());
279+
280+
env.setScanningDepth(1);
281+
List<String> repoDirs = addedRepos.stream().map(RepositoryInfo::getDirectoryName).collect(Collectors.toList());
282+
instance.removeRepositories(repoDirs);
283+
addedRepos = instance.addRepositories(
284+
Collections.singleton(Paths.get(repository.getSourceRoot(),topLevelDirName).toString()));
285+
assertEquals("should add to increased max depth", 2, addedRepos.size());
286+
287+
env.setScanningDepth(originalScanDepth);
288+
}
255289
}

0 commit comments

Comments
 (0)