Skip to content

Commit b384029

Browse files
committed
RDFa parser finalized
1 parent 15bd804 commit b384029

File tree

4 files changed

+161
-62
lines changed

4 files changed

+161
-62
lines changed

src/main/java/fr/inria/corese/core/next/api/base/model/AbstractModel.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,8 @@ public boolean containsAll(Collection<?> collection) {
334334
Iterator<?> iterator = collection.iterator();
335335
try {
336336
while (iterator.hasNext()) {
337-
if (!contains(iterator.next())) {
337+
Object currentObject = iterator.next();
338+
if (! (currentObject instanceof Statement) && ! this.contains(currentObject)) {
338339
return false;
339340
}
340341
}

src/main/java/fr/inria/corese/core/next/impl/io/parser/rdfa/RDFaParser.java

Lines changed: 21 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,29 @@
44
import fr.inria.corese.core.next.api.base.io.RDFFormat;
55
import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser;
66
import fr.inria.corese.core.next.api.io.IOOptions;
7+
import fr.inria.corese.core.next.api.io.common.BaseIRIOptions;
78
import fr.inria.corese.core.next.impl.common.util.IRIUtils;
89
import fr.inria.corese.core.next.impl.common.vocabulary.RDF;
910
import fr.inria.corese.core.next.impl.exception.ParsingErrorException;
1011
import fr.inria.corese.core.next.impl.io.parser.rdfa.model.RDFaIncompleteStatement;
1112
import fr.inria.corese.core.next.impl.io.parser.util.ParserConstants;
13+
import org.apache.commons.io.input.ReaderInputStream;
1214
import org.jsoup.Jsoup;
1315
import org.jsoup.nodes.Attribute;
1416
import org.jsoup.nodes.Document;
1517
import org.jsoup.nodes.Element;
16-
import org.slf4j.Logger;
17-
import org.slf4j.LoggerFactory;
1818

1919
import java.io.InputStream;
20+
import java.io.InputStreamReader;
2021
import java.io.Reader;
22+
import java.nio.charset.StandardCharsets;
2123
import java.util.*;
2224

25+
/**
26+
* RDFa parser. This parser will load the RDF data stored as RDFa in an HTML page. Its inner implementation is based on the jsoup library. It loads the html page as DOM and process it following the <a href="https://www.w3.org/TR/rdfa-syntax/#sec_5.5.">recommended algorithm in the RDFa recommendation.</a>
27+
*/
2328
public class RDFaParser extends AbstractRDFParser {
2429

25-
private static final Logger logger = LoggerFactory.getLogger(RDFaParser.class);
26-
2730
private static final String REL_ATTR = "rel";
2831
private static final String REV_ATTR = "rev";
2932
private static final String CONTENT_ATTR = "content";
@@ -51,6 +54,16 @@ public RDFFormat getRDFFormat() {
5154
return RDFFormat.RDFa;
5255
}
5356

57+
@Override
58+
public void parse(InputStream in) {
59+
if(getConfig() instanceof BaseIRIOptions baseIRIOptions) {
60+
String baseIRI = baseIRIOptions.getBaseIRI();
61+
parse(new InputStreamReader(in, StandardCharsets.UTF_8), baseIRI);
62+
} else {
63+
parse(new InputStreamReader(in, StandardCharsets.UTF_8), null);
64+
}
65+
}
66+
5467
@Override
5568
public void parse(InputStream in, String baseURIString) {
5669
try {
@@ -90,7 +103,7 @@ private void processDocument(Document document, IRI baseIri) {
90103
}
91104

92105
for (Element element : document.children()) {
93-
processElement(element, new RDFaEvaluationContext(baseIri), baseIri);
106+
processElement(element, new RDFaEvaluationContext(baseIri));
94107
}
95108
}
96109

@@ -103,7 +116,6 @@ private void processDocument(Document document, IRI baseIri) {
103116
* @see <a href="https://www.w3.org/TR/rdfa-syntax/#s_rdfaindetail">RDFa processing in details<a/>
104117
*/
105118
private void processElement(Element element, RDFaEvaluationContext context, boolean recursive, boolean skipElement) {
106-
logger.debug("processElement({}, {}, ...)", element, context);
107119

108120
// 1. First, the local values are initialized
109121
Resource newSubject = null;
@@ -122,7 +134,6 @@ private void processElement(Element element, RDFaEvaluationContext context, bool
122134
if (attribute.getKey().startsWith(XMLNS_PREFIX)) {
123135
String prefixName = attribute.localName();
124136
IRI prefixNamespace = getValueFactory().createIRI(attribute.getValue(), "");
125-
logger.debug("Mapping: {} = {}", prefixName, prefixNamespace.stringValue());
126137
context.addUriMapping(prefixName, prefixNamespace);
127138
}
128139
}
@@ -140,25 +151,21 @@ private void processElement(Element element, RDFaEvaluationContext context, bool
140151
Optional<Resource> newSubjectResource = getResourceFromElementAttribute(element, ABOUT_ATTR, context);
141152
if (newSubjectResource.isPresent()) {
142153
newSubject = newSubjectResource.get();
143-
logger.debug("@about found: {}", newSubjectResource.get().stringValue());
144154
}
145155
} else if (element.attribute(SRC_ATTR) != null) { // otherwise, by using the URI from @src, if present, obtained according to the section on CURIE and URI Processing.
146156
Optional<Resource> newSubjectResource = getResourceFromElementAttribute(element, SRC_ATTR, context);
147157
if (newSubjectResource.isPresent()) {
148158
newSubject = newSubjectResource.get();
149-
logger.debug("@src found: {}", newSubjectResource.get().stringValue());
150159
}
151160
} else if (element.attribute(RESOURCE_ATTR) != null) { // otherwise, by using the URI from @resource, if present, obtained according to the section on CURIE and URI Processing;
152161
Optional<Resource> newSubjectResource = getResourceFromElementAttribute(element, RESOURCE_ATTR, context);
153162
if (newSubjectResource.isPresent()) {
154163
newSubject = newSubjectResource.get();
155-
logger.debug("@resource found: {}", newSubjectResource.get().stringValue());
156164
}
157165
} else if (element.attribute(HREF_ATTR) != null) { // otherwise, by using the URI from @href, if present, obtained according to the section on CURIE and URI Processing.
158166
Optional<Resource> newSubjectResource = getResourceFromElementAttribute(element, HREF_ATTR, context);
159167
if (newSubjectResource.isPresent()) {
160168
newSubject = newSubjectResource.get();
161-
logger.debug("href found: {}", newSubjectResource.get());
162169
}
163170
} else if (element.nameIs("body") || element.nameIs("head")) { // if the element is the head or body element then act as if there is an empty @about present, and process it according to the rule for @about, above;
164171
newSubject = context.baseIri();
@@ -176,13 +183,11 @@ private void processElement(Element element, RDFaEvaluationContext context, bool
176183
Optional<Resource> newSubjectResource = getResourceFromElementAttribute(element, ABOUT_ATTR, context);
177184
if (newSubjectResource.isPresent()) {
178185
newSubject = newSubjectResource.get();
179-
logger.debug("@about found: {}", newSubjectResource.get());
180186
}
181187
} else if (element.attribute(SRC_ATTR) != null) { // otherwise, by using the URI from @src, if present, obtained according to the section on CURIE and URI Processing.
182188
Optional<Resource> newSubjectResource = getResourceFromElementAttribute(element, SRC_ATTR, context);
183189
if (newSubjectResource.isPresent()) {
184190
newSubject = newSubjectResource.get();
185-
logger.debug("@src found: {}", newSubjectResource.get());
186191
}
187192
} else if (element.nameIs("body") || element.nameIs("head")) { // if the element is the head or body element then act as if there is an empty @about present, and process it according to the rule for @about, above;
188193
newSubject = context.baseIri();
@@ -197,22 +202,15 @@ private void processElement(Element element, RDFaEvaluationContext context, bool
197202
Optional<Resource> newObjectResource = getResourceFromElementAttribute(element, RESOURCE_ATTR, context);
198203
if (newObjectResource.isPresent()) {
199204
currentObject = newObjectResource.get();
200-
logger.debug("@resource found: {}", newObjectResource.get().stringValue());
201205
}
202206
} else if (element.attribute(HREF_ATTR) != null) { // otherwise, by using the URI from @href, if present, obtained according to the section on CURIE and URI Processing.
203207
Optional<Resource> newObjectResource = getResourceFromElementAttribute(element, RESOURCE_ATTR, context);
204208
if (newObjectResource.isPresent()) {
205209
currentObject = newObjectResource.get();
206-
logger.debug("href found: {}", newObjectResource.get().stringValue());
207210
}
208211
}
209212
}
210213

211-
if (newSubject != null)
212-
logger.debug("New subject resolved to {}", newSubject.stringValue());
213-
if(currentObject != null)
214-
logger.debug("Current object resolved to {}", currentObject.stringValue());
215-
216214
// 6. If in any of the previous steps a [new subject] was set to a non-null value, it is now used to provide a subject for type values;
217215
if(newSubject != null) {
218216
if(element.attribute(TYPEOF_ATTR) != null) { // One or more 'types' for the [new subject] can be set by using @typeof. If present, the attribute must contain one or more URIs, obtained according to the section on URI and CURIE Processing, each of which is used to generate a triple as follows:
@@ -271,7 +269,6 @@ private void processElement(Element element, RDFaEvaluationContext context, bool
271269
Optional<Resource> propertyOpt = getResourceFromElementAttribute(element, PROPERTY_ATTR, context);
272270
if(propertyOpt.isPresent() && propertyOpt.get().isIRI()) {
273271
IRI property = (IRI)propertyOpt.get();
274-
logger.debug("Property found: {}", property.stringValue());
275272

276273
IRI datatype = null;
277274
if(element.attribute(DATATYPE_ATTR) != null && ! element.attr(DATATYPE_ATTR).isEmpty()) {
@@ -285,18 +282,14 @@ private void processElement(Element element, RDFaEvaluationContext context, bool
285282
value = element.attr(CONTENT_ATTR);
286283
}
287284
if(datatype != null) {
288-
logger.debug("Literal value: {}, datatype: {}", value, datatype.stringValue());
289285
currentObjectLiteral = this.getValueFactory().createLiteral(value, datatype);
290286
recursive = false;
291287
} else if(language != null) {
292-
logger.debug("Literal value: {}, language: {}", value, language);
293288
currentObjectLiteral = this.getValueFactory().createLiteral(value, language);
294289
} else {
295-
logger.debug("Literal value: {}", value);
296290
currentObjectLiteral = this.getValueFactory().createLiteral(value);
297291
}
298292

299-
logger.debug("Adding {} {} {} {}", newSubject.stringValue(), property.stringValue(), currentObjectLiteral.getLabel(), currentObjectLiteral.getDatatype().stringValue());
300293
this.getModel().add(newSubject, property, currentObjectLiteral);
301294
}
302295
}
@@ -341,11 +334,10 @@ private void processElement(Element element, RDFaEvaluationContext context, bool
341334
/**
342335
* Surcharge function that initialize the flags and subject and objet to their initial values for processing
343336
*
344-
* @param element
345-
* @param context
346-
* @param newSubject
337+
* @param element HTML element
338+
* @param context current evaluation context
347339
*/
348-
private void processElement(Element element, RDFaEvaluationContext context, Resource newSubject) {
340+
private void processElement(Element element, RDFaEvaluationContext context) {
349341
processElement(element, context, true, false);
350342
}
351343

@@ -413,7 +405,6 @@ private Optional<Resource> getResourceFromElementAttribute(Element element, Stri
413405
if (element.attribute(attributeName) != null) { // otherwise, by using the URI from @resource, if present, obtained according to the section on CURIE and URI Processing;
414406
String newSubjectString = element.attr(attributeName);
415407
return resolveStringResource(newSubjectString, context);
416-
417408
}
418409
return Optional.empty();
419410
}

src/main/java/fr/inria/corese/core/next/impl/temp/CoreseModel.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,6 @@ public boolean add(Resource subject, IRI predicate, Value object, Resource... co
148148

149149
@Override
150150
public boolean contains(Resource subject, IRI predicate, Value object, Resource... contexts) {
151-
152151
Node subjectNode = converter.toCoreseNode(subject);
153152
Node predicateNode = converter.toCoreseNode(predicate);
154153
Node objectNode = converter.toCoreseNode(object);

0 commit comments

Comments
 (0)