Skip to content

Commit 864a748

Browse files
committed
Fix nested tag-capturing and non-capturing literals conflicting
Closes #49
1 parent da8c2f6 commit 864a748

File tree

3 files changed

+128
-16
lines changed

3 files changed

+128
-16
lines changed

lib/IActiveTag.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@ export interface IActiveTag {
1212
explicitNewSubject?: boolean;
1313
predicates?: RDF.NamedNode[];
1414
object?: RDF.NamedNode | RDF.BlankNode | boolean;
15-
text?: string[];
15+
textWithTags?: string[];
16+
textWithoutTags?: string[];
1617
vocab?: string;
1718
language?: string;
1819
datatype?: RDF.NamedNode;
1920
collectChildTags?: boolean;
21+
collectChildTagsForCurrentTag?: boolean;
2022
collectedPatternTag?: IRdfaPattern;
2123
interpretObjectAsTime?: boolean;
2224
incompleteTriples?: { predicate: RDF.Quad_Predicate, reverse: boolean, list?: boolean }[];

lib/RdfaParser.ts

Lines changed: 41 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ export class RdfaParser extends Transform implements RDF.Sink<EventEmitter, RDF.
104104
// Create a new active tag and inherit language scope and baseIRI from parent
105105
const activeTag: IActiveTag = {
106106
collectChildTags: parentTag.collectChildTags,
107+
collectChildTagsForCurrentTag: parentTag.collectChildTagsForCurrentTag,
107108
incompleteTriples: [],
108109
inlist: 'inlist' in attributes,
109110
listMapping: <{[predicate: string]: (RDF.Term|boolean)[]}> <any> [],
@@ -129,7 +130,7 @@ export class RdfaParser extends Transform implements RDF.Sink<EventEmitter, RDF.
129130
}
130131

131132
const attributesSerialized = Object.keys(attributes).map((key) => `${key}="${attributes[key]}"`).join(' ');
132-
activeTag.text = [`<${name}${attributesSerialized ? ' ' + attributesSerialized : ''}>`];
133+
activeTag.textWithTags = [`<${name}${attributesSerialized ? ' ' + attributesSerialized : ''}>`];
133134
if (this.features.skipHandlingXmlLiteralChildren) {
134135
return;
135136
}
@@ -469,6 +470,7 @@ export class RdfaParser extends Transform implements RDF.Sink<EventEmitter, RDF.
469470
&& (activeTag.datatype.value === Util.RDF + 'XMLLiteral'
470471
|| (this.features.htmlDatatype && activeTag.datatype.value === Util.RDF + 'HTML'))) {
471472
activeTag.collectChildTags = true;
473+
activeTag.collectChildTagsForCurrentTag = true;
472474
}
473475
} else {
474476
// Try to determine resource
@@ -488,6 +490,13 @@ export class RdfaParser extends Transform implements RDF.Sink<EventEmitter, RDF.
488490
}
489491
}
490492

493+
// If we're in a parent tag that collects child tags,
494+
// and we find a tag that does NOT preserve tags,
495+
// we mark this tag (and children) to not preserve it.
496+
if (!('datatype' in attributes) || attributes.datatype === '') {
497+
activeTag.collectChildTagsForCurrentTag = false;
498+
}
499+
491500
if ('content' in attributes) {
492501
// Emit triples based on content attribute has preference over text content
493502
const object = this.util.createLiteral(attributes.content, activeTag);
@@ -586,10 +595,14 @@ export class RdfaParser extends Transform implements RDF.Sink<EventEmitter, RDF.
586595
}
587596

588597
// Save the text inside the active tag
589-
if (!activeTag.text) {
590-
activeTag.text = [];
598+
if (!activeTag.textWithTags) {
599+
activeTag.textWithTags = [];
591600
}
592-
activeTag.text.push(data);
601+
if (!activeTag.textWithoutTags) {
602+
activeTag.textWithoutTags = [];
603+
}
604+
activeTag.textWithTags.push(data);
605+
activeTag.textWithoutTags.push(data);
593606
}
594607

595608
public onTagClose() {
@@ -628,10 +641,15 @@ export class RdfaParser extends Transform implements RDF.Sink<EventEmitter, RDF.
628641
// Emit all triples that were determined in the active tag
629642
if (activeTag.predicates) {
630643
const subject = this.util.getResourceOrBaseIri(activeTag.subject, activeTag);
631-
let textSegments: string[] = activeTag.text || [];
632-
if (activeTag.collectChildTags && parentTag.collectChildTags) {
633-
// If we are inside an XMLLiteral child that also has RDFa content, ignore the tag name that was collected.
634-
textSegments = textSegments.slice(1);
644+
let textSegments: string[];
645+
if (!activeTag.collectChildTagsForCurrentTag) {
646+
textSegments = activeTag.textWithoutTags || [];
647+
} else {
648+
textSegments = activeTag.textWithTags || [];
649+
if (activeTag.collectChildTags && parentTag.collectChildTags) {
650+
// If we are inside an XMLLiteral child that also has RDFa content, ignore the tag name that was collected.
651+
textSegments = textSegments.slice(1);
652+
}
635653
}
636654
const object = this.util.createLiteral(textSegments.join(''), activeTag);
637655
if (activeTag.inlist) {
@@ -646,7 +664,8 @@ export class RdfaParser extends Transform implements RDF.Sink<EventEmitter, RDF.
646664

647665
// Reset text, unless the parent is also collecting text
648666
if (!parentTag.predicates) {
649-
activeTag.text = null;
667+
activeTag.textWithoutTags = null;
668+
activeTag.textWithTags = null;
650669
}
651670
}
652671

@@ -683,16 +702,23 @@ export class RdfaParser extends Transform implements RDF.Sink<EventEmitter, RDF.
683702
this.activeTagStack.pop();
684703

685704
// Save the tag contents if needed
686-
if (activeTag.collectChildTags && activeTag.text) {
687-
activeTag.text.push(`</${activeTag.name}>`);
705+
if (activeTag.collectChildTags && activeTag.textWithTags) {
706+
activeTag.textWithTags.push(`</${activeTag.name}>`);
688707
}
689708

690709
// If we still have text contents, try to append it to the parent tag
691-
if (activeTag.text && parentTag) {
692-
if (!parentTag.text) {
693-
parentTag.text = activeTag.text;
710+
if (activeTag.textWithTags && parentTag) {
711+
if (!parentTag.textWithTags) {
712+
parentTag.textWithTags = activeTag.textWithTags;
713+
} else {
714+
parentTag.textWithTags = parentTag.textWithTags.concat(activeTag.textWithTags);
715+
}
716+
}
717+
if (activeTag.textWithoutTags && parentTag) {
718+
if (!parentTag.textWithoutTags) {
719+
parentTag.textWithoutTags = activeTag.textWithoutTags;
694720
} else {
695-
parentTag.text = parentTag.text.concat(activeTag.text);
721+
parentTag.textWithoutTags = parentTag.textWithoutTags.concat(activeTag.textWithoutTags);
696722
}
697723
}
698724
}

test/RdfParser-test.ts

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3708,6 +3708,23 @@ prefix="dc: http://purl.org/dc/elements/1.1/">
37083708
]);
37093709
});
37103710

3711+
it('@about and empty @datatype should force string literal value with sub-tags', async () => {
3712+
return expect(await parse(parser, `<html xmlns="http://www.w3.org/1999/xhtml">
3713+
<head>
3714+
<title>Test 0290</title>
3715+
</head>
3716+
<body>
3717+
<h1>@href becomes subject when @property and @datatype are present</h1>
3718+
<p about="http://example.org/" property="rdf:value" datatype="">value <em>bar</em></p>
3719+
</body>
3720+
</html>`))
3721+
.toBeRdfIsomorphic([
3722+
quad('http://example.org/',
3723+
'http://www.w3.org/1999/02/22-rdf-syntax-ns#value',
3724+
'"value bar"'),
3725+
]);
3726+
});
3727+
37113728
it('@href and empty @datatype should force string literal value', async () => {
37123729
return expect(await parse(parser, `<html>
37133730
<head>
@@ -3725,6 +3742,73 @@ prefix="dc: http://purl.org/dc/elements/1.1/">
37253742
]);
37263743
});
37273744

3745+
it('@href and empty @datatype should force string literal value with sub-tags', async () => {
3746+
return expect(await parse(parser, `<html>
3747+
<head>
3748+
<title>Test 0290</title>
3749+
</head>
3750+
<body>
3751+
<h1>@href becomes subject when @property and @datatype are present</h1>
3752+
<a href="http://example.org/" property="rdf:value" datatype="">value <em>bar</em></a>
3753+
</body>
3754+
</html>`))
3755+
.toBeRdfIsomorphic([
3756+
quad('http://example.org/',
3757+
'http://www.w3.org/1999/02/22-rdf-syntax-ns#value',
3758+
'"value bar"'),
3759+
]);
3760+
});
3761+
3762+
it('rdf:HTML datatype and empty @datatype should nestable', async () => {
3763+
return expect(await parse(parser, `<html>
3764+
<head>
3765+
<title>Test dummy</title>
3766+
</head>
3767+
<body>
3768+
<h1>@href becomes subject when @property and @datatype are present</h1>
3769+
<div datatype="rdf:HTML" property="schema:description">
3770+
<a href="http://example.org/" property="rdf:value" datatype="">value <em>bar</em></a>
3771+
</div>
3772+
</body>
3773+
</html>`))
3774+
.toBeRdfIsomorphic([
3775+
quad('http://example.org/',
3776+
'http://www.w3.org/1999/02/22-rdf-syntax-ns#value',
3777+
'"value bar"'),
3778+
quad('http://example.org/',
3779+
'http://schema.org/description',
3780+
'"\n <a href="http://example.org/" property="rdf:value" datatype="">value <em>bar</em></a>\n "^^http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML'),
3781+
]);
3782+
});
3783+
3784+
it('rdf:HTML datatype and empty @datatype should nestable recursively', async () => {
3785+
return expect(await parse(parser, `<html>
3786+
<head>
3787+
<title>Test dummy</title>
3788+
</head>
3789+
<body>
3790+
<h1>@href becomes subject when @property and @datatype are present</h1>
3791+
<div datatype="rdf:HTML" property="schema:description">
3792+
<a href="http://example.org/" property="rdf:value" datatype="">value <em>bar</em><div datatype="rdf:HTML" property="schema:description2"><a href="http://example.org/" property="rdf:value2" datatype="">value2 <em>bar2</em></a></div></a>
3793+
</div>
3794+
</body>
3795+
</html>`))
3796+
.toBeRdfIsomorphic([
3797+
quad('http://example.org/',
3798+
'http://www.w3.org/1999/02/22-rdf-syntax-ns#value2',
3799+
'"value2 bar2"'),
3800+
quad('http://example.org/',
3801+
'http://schema.org/description2',
3802+
'"<a href="http://example.org/" property="rdf:value2" datatype="">value2 <em>bar2</em></a>"^^http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML'),
3803+
quad('http://example.org/',
3804+
'http://www.w3.org/1999/02/22-rdf-syntax-ns#value',
3805+
'"value barvalue2 bar2"'),
3806+
quad('http://example.org/',
3807+
'http://schema.org/description',
3808+
'"\n <a href="http://example.org/" property="rdf:value" datatype="">value <em>bar</em><div datatype="rdf:HTML" property="schema:description2"><a href="http://example.org/" property="rdf:value2" datatype="">value2 <em>bar2</em></a></div></a>\n "^^http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML'),
3809+
]);
3810+
});
3811+
37283812
it('@property does not set parent object without @typeof', async () => {
37293813
return expect(await parse(parser, `<html>
37303814
<head>

0 commit comments

Comments
 (0)