Skip to content

Commit c8c60cd

Browse files
llingllinggit
authored andcommitted
Fixes after review of work DHFPROD-3136.
1 parent 144e290 commit c8c60cd

File tree

2 files changed

+19
-27
lines changed

2 files changed

+19
-27
lines changed

marklogic-client-api/src/main/java/com/marklogic/client/datamovement/ZipSplitter.java

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -50,22 +50,24 @@ public class ZipSplitter implements Splitter<BytesHandle> {
5050
/**
5151
* Returns the extensionFormats set to splitter. The extensionFormat is pre-defined in splitter.
5252
* It includes "json", "txt" and "xml" extensions. If the file has no extension, it is treated as binary file.
53+
* You can also add mappings from other extensions in the zipfile to one of the four MarkLogic formats.
5354
* @return a map of extensionFormats
5455
*/
5556
public Map<String, Format> getExtensionFormats() {
5657
return this.extensionFormats;
5758
}
5859

5960
/**
60-
* Returns the entryFileter set to splitter.
61-
* @return the entryFileter set to splitter
61+
* Returns the entryFilter set to splitter.
62+
* @return the entryFilter set to splitter
6263
*/
6364
public Predicate<ZipEntry> getEntryFilter() {
6465
return this.entryFilter;
6566
}
6667

6768
/**
68-
* Used to set entryFilter to splitter.
69+
* Used to set entryFilter to splitter. The entryFilter is a lambda function, which can be used to inspect
70+
* the zip entry and return false for any document in the zipfile that should be ignored.
6971
* @param entryFilter the filter that applied to each zipEntry
7072
*/
7173
public void setEntryFilter(Predicate<ZipEntry> entryFilter) {
@@ -81,7 +83,8 @@ public Function<String, String> getUriTransformer() {
8183
}
8284

8385
/**
84-
* Used to set uriTransformer to splitter
86+
* Used to set uriTransformer to splitter. The uriTransformer is a lambda function, which can be used to
87+
* transform the name of the document in the zipfile into the document URI for the database.
8588
* @param uriTransformer the uriTransformer which applied on each document URI
8689
*/
8790
public void setUriTransformer(Function<String, String> uriTransformer) {
@@ -106,6 +109,7 @@ public long getCount() {
106109

107110
/**
108111
* Takes a input stream of a ZIP file and convert it to a stream of BytesHandle.
112+
* The input stream must be a ZipInputStream, otherwise it will throw an exception.
109113
* The ZIP file could contain XML, JSON, TXT and BINARY files.
110114
* @param input is the incoming input stream
111115
* @return a stream of BytesHandle
@@ -140,9 +144,8 @@ public Stream<BytesHandle> split(ZipInputStream input) throws IOException {
140144
bytesHandleSpliterator.setZipStream(input);
141145
bytesHandleSpliterator.setEntryFilter(this.entryFilter);
142146
bytesHandleSpliterator.setExtensionFormats(this.extensionFormats);
143-
bytesHandleSpliterator.setUriTransformer(this.uriTransformer);
144147

145-
return StreamSupport.stream(bytesHandleSpliterator, false);
148+
return StreamSupport.stream(bytesHandleSpliterator, true);
146149
}
147150

148151
/**
@@ -162,9 +165,8 @@ public Stream<DocumentWriteOperation> splitWriteOperations(ZipInputStream input)
162165
documentWriteOperationSpliterator.setZipStream(input);
163166
documentWriteOperationSpliterator.setEntryFilter(this.entryFilter);
164167
documentWriteOperationSpliterator.setExtensionFormats(this.extensionFormats);
165-
documentWriteOperationSpliterator.setUriTransformer(this.uriTransformer);
166168

167-
return StreamSupport.stream(documentWriteOperationSpliterator, false);
169+
return StreamSupport.stream(documentWriteOperationSpliterator, true);
168170
}
169171

170172
private static class FormatEntry {
@@ -201,7 +203,6 @@ private static abstract class ZipEntrySpliterator<T> extends Spliterators.Abstra
201203
private ZipInputStream zipStream;
202204
private Map<String,Format> extensionFormats;
203205
private Predicate<ZipEntry> entryFilter;
204-
private Function<String, String> uriTransformer;
205206

206207
ZipEntrySpliterator(long est, int additionalCharacteristics) {
207208
super(est, additionalCharacteristics);
@@ -238,27 +239,15 @@ void setEntryFilter(Predicate<ZipEntry> entryFilter) {
238239
this.entryFilter = entryFilter;
239240
}
240241

241-
Function<String, String> getUriTransformer() {
242-
return this.uriTransformer;
243-
}
244-
245-
void setUriTransformer(Function<String, String> uriTransformer) {
246-
this.uriTransformer = uriTransformer;
247-
}
248-
249242
protected FormatEntry getNextEntry() throws IOException {
250243
ZipEntry candidateEntry;
251244

252245
while ((candidateEntry = getZipStream().getNextEntry()) != null) {
253-
if (getEntryFilter() != null && getEntryFilter().test(candidateEntry) == false) {
246+
if (getEntryFilter() != null && !getEntryFilter().test(candidateEntry)) {
254247
continue;
255248
}
256249

257250
String name = candidateEntry.getName();
258-
if (getUriTransformer() != null) {
259-
name = getUriTransformer().apply(name);
260-
}
261-
262251
Matcher matcher = extensionRegex.matcher(name);
263252
matcher.find();
264253
String extension = matcher.group(1);
@@ -311,7 +300,7 @@ public boolean tryAdvance(Consumer<? super BytesHandle> action) {
311300
splitter.count++;
312301

313302
} catch (IOException e) {
314-
e.printStackTrace();
303+
throw new RuntimeException("Could not read ZipEntry", e);
315304
}
316305

317306
return true;
@@ -340,8 +329,8 @@ public boolean tryAdvance(Consumer<? super DocumentWriteOperation> action) {
340329
String name = nextEntry.getZipEntry().getName();
341330

342331
String uri = name;
343-
if (getUriTransformer() != null) {
344-
uri = getUriTransformer().apply(name);
332+
if (splitter.uriTransformer != null) {
333+
uri = splitter.uriTransformer.apply(name);
345334
}
346335

347336
DocumentWriteOperationImpl documentWriteOperation = new DocumentWriteOperationImpl(
@@ -354,7 +343,7 @@ public boolean tryAdvance(Consumer<? super DocumentWriteOperation> action) {
354343
splitter.count++;
355344

356345
} catch (IOException e) {
357-
e.printStackTrace();
346+
throw new RuntimeException("Could not read ZipEntry", e);
358347
}
359348

360349
return true;

marklogic-client-api/src/test/java/com/marklogic/client/test/ZipSplitterTest.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,13 @@ public class ZipSplitterTest {
2121
public void testSplitter() throws Exception {
2222

2323
ZipSplitter splitter = new ZipSplitter();
24+
splitter.setEntryFilter(x -> x.getSize() > 50 ? true : false );
2425
Stream<BytesHandle> contentStream = splitter.split(new ZipInputStream(new FileInputStream(zipFile)));
2526
assertNotNull(contentStream);
2627

2728
BytesHandle[] bytesResult = contentStream.toArray(size -> new BytesHandle[size]);
2829
assertNotNull(bytesResult);
30+
assertEquals(bytesResult.length, 2);
2931

3032
ZipInputStream zipInputStream = new ZipInputStream(new FileInputStream(zipFile));
3133
ZipEntry zipEntry = null;
@@ -40,6 +42,7 @@ public void testSplitter() throws Exception {
4042
public void testSplitterWrite() throws Exception {
4143

4244
ZipSplitter splitter = new ZipSplitter();
45+
splitter.setUriTransformer(name -> name.toUpperCase());
4346
Stream<DocumentWriteOperation> contentStream =
4447
splitter.splitWriteOperations(new ZipInputStream(new FileInputStream(zipFile)));
4548
assertNotNull(contentStream);
@@ -51,7 +54,7 @@ public void testSplitterWrite() throws Exception {
5154
while (itr.hasNext() && ((zipEntry = zipInputStream.getNextEntry()) != null)) {
5255
DocumentWriteOperation docOp = itr.next();
5356
assertNotNull(docOp.getUri());
54-
assertEquals(docOp.getUri(), zipEntry.getName());
57+
assertEquals(docOp.getUri(), zipEntry.getName().toUpperCase());
5558

5659
assertNotNull(docOp.getContent());
5760
String docOpContent = docOp.getContent().toString();

0 commit comments

Comments
 (0)