Skip to content

Commit 319c89b

Browse files
committed
fix #262 - add to SearchHandle (actually MatchDocumentSummary) support for search:extracted and search:extracted-none elements
1 parent 566477e commit 319c89b

File tree

5 files changed

+480
-4
lines changed

5 files changed

+480
-4
lines changed

src/main/java/com/marklogic/client/impl/HandleAccessor.java

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@
1515
*/
1616
package com.marklogic.client.impl;
1717

18+
import java.io.ByteArrayInputStream;
1819
import java.io.ByteArrayOutputStream;
1920
import java.io.File;
2021
import java.io.FileInputStream;
2122
import java.io.InputStream;
2223
import java.io.InputStreamReader;
2324
import java.io.Reader;
25+
import java.io.StringReader;
2426
import java.io.UnsupportedEncodingException;
2527

2628
import com.marklogic.client.MarkLogicIOException;
@@ -52,9 +54,35 @@ static public <R extends AbstractReadHandle> Class<R> receiveAs(R handle) {
5254
return ((HandleImplementation) handle).receiveAs();
5355
}
5456
static public <R extends AbstractReadHandle> void receiveContent(R handle, Object content) {
55-
if (handle == null)
56-
return;
57-
((HandleImplementation) handle).receiveContent(content);
57+
if (handle == null) return;
58+
HandleImplementation handleImpl = (HandleImplementation) handle;
59+
if ( content == null ) {
60+
handleImpl.receiveContent(content);
61+
} else if ( handleImpl.receiveAs() != null &&
62+
handleImpl.receiveAs().isAssignableFrom(content.getClass()) )
63+
{
64+
handleImpl.receiveContent(content);
65+
} else if ( content instanceof String ) {
66+
if ( InputStream.class.isAssignableFrom(handleImpl.receiveAs()) ) {
67+
handleImpl.receiveContent( new ByteArrayInputStream(getBytes((String) content)) );
68+
} else if ( Reader.class.isAssignableFrom(handleImpl.receiveAs()) ) {
69+
handleImpl.receiveContent( new StringReader((String) content) );
70+
} else if ( byte[].class.isAssignableFrom(handleImpl.receiveAs()) ) {
71+
handleImpl.receiveContent( getBytes((String) content) );
72+
} else if ( String.class.isAssignableFrom(handleImpl.receiveAs()) ) {
73+
handleImpl.receiveContent( content );
74+
}
75+
} else {
76+
handleImpl.receiveContent(content);
77+
}
78+
}
79+
static private byte[] getBytes(String content) {
80+
if ( content == null ) return null;
81+
try {
82+
return content.getBytes("UTF-8");
83+
} catch (UnsupportedEncodingException e) {
84+
return content.getBytes();
85+
}
5886
}
5987
static public <W extends AbstractWriteHandle> Object sendContent(W handle) {
6088
if (handle == null)

src/main/java/com/marklogic/client/io/SearchHandle.java

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
*/
1616
package com.marklogic.client.io;
1717

18+
import java.io.IOException;
1819
import java.io.InputStream;
1920
import java.util.ArrayList;
2021
import java.util.Calendar;
@@ -41,13 +42,21 @@
4142
import org.slf4j.LoggerFactory;
4243
import org.w3c.dom.Document;
4344

45+
import com.fasterxml.jackson.core.JsonProcessingException;
46+
import com.fasterxml.jackson.databind.JsonNode;
47+
import com.fasterxml.jackson.databind.ObjectMapper;
48+
import com.marklogic.client.DatabaseClientFactory;
4449
import com.marklogic.client.DatabaseClientFactory.HandleFactoryRegistry;
4550
import com.marklogic.client.MarkLogicIOException;
51+
import com.marklogic.client.impl.HandleAccessor;
4652
import com.marklogic.client.impl.Utilities;
4753
import com.marklogic.client.io.marker.ContentHandle;
4854
import com.marklogic.client.io.marker.OperationNotSupported;
4955
import com.marklogic.client.io.marker.SearchReadHandle;
56+
import com.marklogic.client.io.marker.StructureReadHandle;
5057
import com.marklogic.client.io.marker.XMLReadHandle;
58+
import com.marklogic.client.query.ExtractedItem;
59+
import com.marklogic.client.query.ExtractedResult;
5160
import com.marklogic.client.query.FacetHeatmapValue;
5261
import com.marklogic.client.query.FacetResult;
5362
import com.marklogic.client.query.FacetValue;
@@ -510,6 +519,7 @@ private class MatchDocumentSummaryImpl implements MatchDocumentSummary {
510519
private Format format = null;
511520

512521
private ArrayList<EventRange> snippetEvents;
522+
private EventRange extractedEvents;
513523
private EventRange metadataEvents;
514524
private EventRange relevanceEvents;
515525
private ArrayList<String> similarUris;
@@ -549,6 +559,89 @@ public String getPath() {
549559
return path;
550560
}
551561

562+
@Override
563+
public ExtractedResult getExtracted() {
564+
ExtractedResultImpl result = new ExtractedResultImpl();
565+
populateExtractedResult( result, events, extractedEvents );
566+
return result;
567+
}
568+
569+
private void populateExtractedResult(ExtractedResultImpl result, List<XMLEvent> events,
570+
EventRange extractedEvents)
571+
{
572+
int start = extractedEvents.first;
573+
int end = extractedEvents.next;
574+
StartElement element = events.get(start).asStartElement();
575+
QName elementName = element.getName();
576+
if ( "extracted-none".equals(elementName.getLocalPart()) ) {
577+
result.isEmpty = true;
578+
}
579+
@SuppressWarnings("unchecked")
580+
Iterator<Attribute> attributes = element.getAttributes();
581+
while ( attributes.hasNext() ) {
582+
Attribute attr = attributes.next();
583+
String attrName = attr.getName().getLocalPart();
584+
if ( "context".equals(attrName) ) {
585+
result.context = attr.getValue();
586+
} else if ( "format".equals(attrName) ) {
587+
result.format = Format.valueOf(attr.getValue().toUpperCase());
588+
} else if ( "kind".equals(attrName) ) {
589+
result.kind = attr.getValue();
590+
}
591+
}
592+
int startChildren = start + 1;
593+
int endChildren = end - 1;
594+
// now get the children (extracted items) as strings
595+
EventRange extractedItemEvents = new EventRange(startChildren, endChildren);
596+
if ( Format.XML == getFormat() ) {
597+
result.setItems( populateExtractedItems(getSlice(events, extractedItemEvents)) );
598+
// if result.context is populated, this is not a root document node
599+
} else if ( Format.JSON == getFormat() && ! result.isEmpty && result.context != null ) {
600+
String json = events.get(startChildren).toString();
601+
try {
602+
JsonNode jsonArray = new ObjectMapper().readTree(json);
603+
ArrayList<String> items = new ArrayList<String>(jsonArray.size());
604+
for ( JsonNode item : jsonArray ) {
605+
items.add( item.toString() );
606+
}
607+
result.setItems( items );
608+
} catch (Throwable e) {
609+
throw new MarkLogicIOException("Cannot parse JSON '" + json + "' for " +
610+
result.context, e);
611+
}
612+
} else {
613+
ArrayList<String> items = new ArrayList<String>(1);
614+
items.add( events.get(startChildren).toString() );
615+
result.setItems( items );
616+
}
617+
}
618+
619+
private List<String> populateExtractedItems(List<XMLEvent> events) {
620+
List<String> items = new ArrayList<String>();
621+
List<XMLEvent> itemEvents = new ArrayList<XMLEvent>();
622+
QName startName = null;
623+
for ( XMLEvent event : events ) {
624+
itemEvents.add(event);
625+
switch (event.getEventType()) {
626+
case XMLStreamConstants.START_ELEMENT: {
627+
if (startName == null ) {
628+
startName = event.asStartElement().getName();
629+
}
630+
break;
631+
}
632+
case XMLStreamConstants.END_ELEMENT: {
633+
if (startName.equals(event.asEndElement().getName())) {
634+
startName = null;
635+
items.add(Utilities.eventsToString(itemEvents));
636+
itemEvents = new ArrayList<XMLEvent>();
637+
}
638+
break;
639+
}
640+
}
641+
}
642+
return items;
643+
}
644+
552645
@Override
553646
public <T> T getFirstSnippetAs(Class<T> as) {
554647
ContentHandle<T> handle = getHandleRegistry().makeHandle(as);
@@ -1092,6 +1185,8 @@ private void handleResult(XMLEventReader reader, StartElement element)
10921185
private void collectResult(XMLEventReader reader, StartElement element)
10931186
throws XMLStreamException {
10941187
QName snippetName = new QName(SEARCH_NS, "snippet");
1188+
QName extractedName = new QName(SEARCH_NS, "extracted");
1189+
QName extractedNoneName = new QName(SEARCH_NS, "extracted-none");
10951190
QName metadataName = new QName(SEARCH_NS, "metadata");
10961191
QName similarName = new QName(SEARCH_NS, "similar");
10971192
QName relevanceInfoName = new QName(QUERY_NS, "relevance-info");
@@ -1109,6 +1204,10 @@ private void collectResult(XMLEventReader reader, StartElement element)
11091204
QName startName = startElement.getName();
11101205
if (snippetName.equals(startName)) {
11111206
handleSnippet(reader, startElement);
1207+
} else if (extractedName.equals(startName)) {
1208+
handleExtracted(reader, startElement);
1209+
} else if (extractedNoneName.equals(startName)) {
1210+
handleExtracted(reader, startElement);
11121211
} else if (metadataName.equals(startName)) {
11131212
handleMetadata(reader, startElement);
11141213
} else if (similarName.equals(startName)) {
@@ -1145,6 +1244,10 @@ private void collectResult(XMLEventReader reader, StartElement element)
11451244
addSnippet(new EventRange(first, tempEvents.size()));
11461245
}
11471246
}
1247+
private void handleExtracted(XMLEventReader reader, StartElement element)
1248+
throws XMLStreamException {
1249+
currSummary.extractedEvents = consumeEvents(reader, element);
1250+
}
11481251
private void handleMetadata(XMLEventReader reader, StartElement element)
11491252
throws XMLStreamException {
11501253
// TODO: populate map with element name/content key/value pairs
@@ -1486,4 +1589,86 @@ private EventRange consumeEvents(XMLEventReader reader, StartElement element)
14861589
return new EventRange(first, tempEvents.size());
14871590
}
14881591
}
1592+
1593+
static private class ExtractedItemImpl implements ExtractedItem {
1594+
String item;
1595+
1596+
public ExtractedItemImpl(String item) {
1597+
this.item = item;
1598+
}
1599+
1600+
public <T extends StructureReadHandle> T get(T handle) {
1601+
HandleAccessor.receiveContent(handle, item);
1602+
return handle;
1603+
}
1604+
public <T> T getAs(Class<T> as) {
1605+
ContentHandle<T> readHandle = DatabaseClientFactory.getHandleRegistry().makeHandle(as);
1606+
if ( readHandle == null ) return null;
1607+
HandleAccessor.receiveContent(readHandle, item);
1608+
return readHandle.get();
1609+
}
1610+
}
1611+
1612+
static private class ExtractedResultImpl implements ExtractedResult {
1613+
boolean isEmpty = false;
1614+
Format format;
1615+
String context;
1616+
String kind;
1617+
private List<String> itemStrings;
1618+
private List<ExtractedItem> items;
1619+
private Iterator<ExtractedItem> internalIterator;
1620+
1621+
public boolean isEmpty() {
1622+
return isEmpty;
1623+
}
1624+
public Format getFormat() {
1625+
return format;
1626+
}
1627+
public String getContext() {
1628+
return context;
1629+
}
1630+
public String getKind() {
1631+
return kind;
1632+
}
1633+
public int size() {
1634+
if ( items == null ) return 0;
1635+
return items.size();
1636+
}
1637+
1638+
public Iterator<ExtractedItem> iterator() {
1639+
return items.iterator();
1640+
}
1641+
1642+
private void setItems(List<String> itemStrings) {
1643+
if ( itemStrings == null ) return;
1644+
this.itemStrings = itemStrings;
1645+
items = new ArrayList<ExtractedItem>(itemStrings.size());
1646+
for ( String itemString : itemStrings ) {
1647+
items.add( new ExtractedItemImpl(itemString) );
1648+
}
1649+
internalIterator = items.iterator();
1650+
}
1651+
1652+
public boolean hasNext() {
1653+
return internalIterator.hasNext();
1654+
}
1655+
1656+
public ExtractedItem next() {
1657+
return internalIterator.next();
1658+
}
1659+
1660+
public String toString() {
1661+
StringBuffer sb = new StringBuffer();
1662+
sb.append("ExtractedResult: ");
1663+
sb.append(isEmpty == true ? "isEmpty:[true] " : "");
1664+
sb.append(format != null ? "format:[" + format.toString() + "] " : "");
1665+
sb.append(context != null ? "context:[" + context + "] " : "");
1666+
sb.append(kind != null ? "kind:[" + kind + "] " : "");
1667+
for ( int i=1; i <= itemStrings.size(); i++ ) {
1668+
String item = itemStrings.get(i - 1);
1669+
sb.append("item_" + i + ":[" + item + "] ");
1670+
}
1671+
return sb.toString();
1672+
}
1673+
};
14891674
}

src/main/java/com/marklogic/client/query/MatchDocumentSummary.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ public interface MatchDocumentSummary {
4444
*/
4545
public double getConfidence();
4646

47+
public ExtractedResult getExtracted();
4748
/**
4849
* Returns the fitness of the document.
4950
* @return The fitness.

0 commit comments

Comments
 (0)