Skip to content

Commit e36020d

Browse files
llingllinggit
authored andcommitted
changes for DHFPROD-3107.
1 parent a7615c1 commit e36020d

File tree

5 files changed

+223
-0
lines changed

5 files changed

+223
-0
lines changed
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
/*
2+
* Copyright 2019 MarkLogic Corporation
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.marklogic.client.datamovement;
18+
19+
import com.marklogic.client.io.Format;
20+
import com.marklogic.client.io.StringHandle;
21+
22+
import java.io.*;
23+
import java.nio.charset.Charset;
24+
import java.util.stream.Stream;
25+
26+
/**
27+
* java doc to do
28+
*/
29+
public class LineSplitter implements Splitter<StringHandle> {
30+
private Format format = Format.JSON;
31+
private int count = 0;
32+
33+
/**
34+
* Returns the document format set to splitter.
35+
* @return the document format set to splitter. The default is Format.JSON.
36+
*/
37+
public Format getFormat() {
38+
return this.format;
39+
}
40+
41+
/**
42+
* Used to set document format to splitter
43+
* @param format the document content format.
44+
*/
45+
public void setFormat(Format format) {
46+
if (format == null) {
47+
throw new IllegalArgumentException("some exception");
48+
}
49+
50+
this.format = format;
51+
}
52+
53+
/**
54+
*
55+
* @return the number of objects in the stream.
56+
*/
57+
@Override
58+
public long getCount() {
59+
return count;
60+
}
61+
62+
/**
63+
* Takes the input stream and converts it into a stream of StringHandle. The content could be
64+
* line-delimited JSON file, line-delimited XML file or gzip-compressed line-delimited JSON file.
65+
* @param input is the incoming input stream.
66+
* @return a stream of StringHandle.
67+
* @throws IOException
68+
*/
69+
@Override
70+
public Stream<StringHandle> split(InputStream input) throws IOException {
71+
return split(input, null);
72+
}
73+
74+
/**
75+
* Takes the input stream and converts it into a stream of StringHandle. The content could be
76+
* line-delimited JSON file, line-delimited XML file or gzip-compressed line-delimited JSON file.
77+
* @param input is the incoming input stream.
78+
* @param charset is the encoding scheme the document uses.
79+
* @return a stream of StringHandle.
80+
* @throws IOException
81+
*/
82+
public Stream<StringHandle> split(InputStream input, Charset charset) throws IOException {
83+
if (input == null) {
84+
throw new IllegalArgumentException("some exception");
85+
}
86+
87+
return split( (charset == null) ?
88+
new InputStreamReader(input) :
89+
new InputStreamReader(input, charset));
90+
}
91+
92+
/**
93+
*Takes the Reader input and converts it into a stream of StringHandle. The content could be
94+
* line-delimited JSON file, line-delimited XML file or gzip-compressed line-delimited JSON file.
95+
* @param input is the incoming Reader.
96+
* @return a stream of StringHandle.
97+
* @throws IOException
98+
*/
99+
public Stream<StringHandle> split(Reader input) throws IOException {
100+
if (input == null) {
101+
throw new IllegalArgumentException("some exception");
102+
}
103+
return new BufferedReader(input)
104+
.lines()
105+
.map(line -> {
106+
count++;
107+
return new StringHandle(line).withFormat(getFormat());
108+
});
109+
}
110+
}
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
package com.marklogic.client.test;
2+
3+
import com.marklogic.client.DatabaseClient;
4+
import com.marklogic.client.DatabaseClientFactory;
5+
import com.marklogic.client.datamovement.LineSplitter;
6+
import com.marklogic.client.io.Format;
7+
import com.marklogic.client.io.StringHandle;
8+
import org.junit.After;
9+
import org.junit.Before;
10+
import org.junit.Test;
11+
12+
import java.io.BufferedReader;
13+
import java.io.File;
14+
import java.io.FileInputStream;
15+
import java.io.InputStreamReader;
16+
import java.nio.file.Files;
17+
import java.nio.file.Paths;
18+
import java.util.stream.Stream;
19+
import java.util.zip.GZIPInputStream;
20+
21+
import static org.junit.Assert.*;
22+
23+
public class LineSplitterTest {
24+
static final private String jsonlFile = "src/test/resources/data" + File.separator + "line-delimited.jsonl";
25+
static final private String jsonlGzipFile = "src/test/resources/data" + File.separator + "line-delimited.jsonl.gz";
26+
static final private String xmlFile = "src/test/resources/data" + File.separator + "line-delimited.txt";
27+
private DatabaseClient client;
28+
29+
@Before
30+
public void setUp() {
31+
client = DatabaseClientFactory.newClient("localhost", 8012,
32+
new DatabaseClientFactory.DigestAuthContext("rest-admin", "x"));
33+
}
34+
35+
@Test
36+
public void testSplitter() throws Exception {
37+
LineSplitter splitter = new LineSplitter();
38+
Stream<StringHandle> contentStream = splitter.split(new FileInputStream(jsonlFile));
39+
assertNotNull(contentStream);
40+
41+
StringHandle[] result = contentStream.toArray(size -> new StringHandle[size]);
42+
assertNotNull(result);
43+
assertEquals(result.length, Files.lines(Paths.get(jsonlFile)).count());
44+
45+
String[] originalResult = Files.lines(Paths.get(jsonlFile))
46+
.toArray(size -> new String[size]);
47+
48+
for (int i = 0; i < result.length && i < originalResult.length; i++) {
49+
assertNotNull(result[i].get());
50+
assertEquals(result[i].get(), originalResult[i]);
51+
}
52+
}
53+
54+
@Test
55+
public void testSplitterGzip() throws Exception {
56+
LineSplitter splitter = new LineSplitter();
57+
GZIPInputStream gzipStream = new GZIPInputStream(new FileInputStream(jsonlGzipFile));
58+
Stream<StringHandle> contentStream = splitter.split(gzipStream);
59+
assertNotNull(contentStream);
60+
61+
StringHandle[] result = contentStream.toArray(size -> new StringHandle[size]);
62+
assertNotNull(result);
63+
64+
gzipStream = new GZIPInputStream(new FileInputStream(jsonlGzipFile));
65+
String[] originalResult = new BufferedReader(new InputStreamReader(gzipStream))
66+
.lines()
67+
.toArray(size -> new String[size]);
68+
assertEquals(result.length, originalResult.length);
69+
70+
for (int i = 0; i < result.length && i < originalResult.length; i++) {
71+
assertNotNull(result[i].get());
72+
assertEquals(result[i].get(), originalResult[i]);
73+
}
74+
}
75+
76+
@Test
77+
public void testSplitterXML() throws Exception {
78+
LineSplitter splitter = new LineSplitter();
79+
splitter.setFormat(Format.XML);
80+
Stream<StringHandle> contentStream = splitter.split(new FileInputStream(xmlFile));
81+
assertNotNull(contentStream);
82+
83+
StringHandle[] result = contentStream.toArray(size -> new StringHandle[size]);
84+
assertNotNull(result);
85+
assertEquals(result.length, Files.lines(Paths.get(xmlFile)).count());
86+
87+
String[] originalResult = Files.lines(Paths.get(xmlFile))
88+
.toArray(size -> new String[size]);
89+
assertEquals(result[0].getFormat(), Format.XML);
90+
for (int i = 0; i < result.length && i < originalResult.length; i++) {
91+
assertNotNull(result[i].get());
92+
assertEquals(result[i].get(), originalResult[i]);
93+
}
94+
}
95+
96+
@After
97+
public void closeSetUp() {
98+
client.release();
99+
}
100+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
["Name", "Session", "Score", "Completed"]
2+
["Gilbert", "2013", 24, true]
3+
["Alexa", "2013", 29, true]
4+
["May", "2012B", 14, false]
5+
["Deloise", "2012A", 19, true]
6+
{"name": "Gilbert", "wins": [["straight", "7"], ["one pair", "10"]]}
7+
{"name": "Alexa", "wins": [["two pair", "4"], ["two pair", "9"]]}
8+
{"name": "May", "wins": []}
9+
{"name": "Deloise", "wins": [["three of a kind", "5"]]}
Binary file not shown.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<root1><content1>first line</content1>></root1>
2+
<root2><content2>second line</content2>></root2>
3+
<root3><content3>third line</content3>></root3>
4+
<root4><content4>fourth line</content4>></root4>

0 commit comments

Comments
 (0)