@@ -42,6 +42,7 @@ const LRU = require('lru-cache');
4242const NQuads = require ( './NQuads' ) ;
4343const Rdfa = require ( './Rdfa' ) ;
4444
45+ const { prependBase : _prependBase } = require ( './url' ) ;
4546const { expand : _expand } = require ( './expand' ) ;
4647const { flatten : _flatten } = require ( './flatten' ) ;
4748const { fromRDF : _fromRDF } = require ( './fromRdf' ) ;
@@ -862,6 +863,9 @@ jsonld.documentLoader = async url => {
862863 * @param url the URL to fetch.
863864 * @param [options] the options to use:
864865 * [documentLoader] the document loader to use.
866+ * [extractAllScripts] concatenates all matching script elements..
867+ * [profile] used when selecting from HTML script elements.
868+ * [requestProfile] one or more profile IRIs to use in the request.
865869 *
866870 * @return a Promise that resolves to the retrieved remote document.
867871 */
@@ -873,7 +877,10 @@ jsonld.get = async function(url, options) {
873877 load = jsonld . documentLoader ;
874878 }
875879
876- const remoteDoc = await load ( url ) ;
880+ // FIXME: unescape frag?
881+ const [ reference , frag ] = url . split ( '#' , 2 ) ;
882+
883+ const remoteDoc = await load ( reference , options ) ;
877884
878885 try {
879886 if ( ! remoteDoc . document ) {
@@ -882,16 +889,68 @@ jsonld.get = async function(url, options) {
882889 'jsonld.NullRemoteDocument' ) ;
883890 }
884891 if ( _isString ( remoteDoc . document ) ) {
885- remoteDoc . document = JSON . parse ( remoteDoc . document ) ;
892+ if ( remoteDoc . contentType && remoteDoc . contentType . includes ( 'text/html' ) ) {
893+ const domParser = new jsonld . domParser ( ) ;
894+ const dom = domParser . parseFromString ( remoteDoc . document ) ;
895+
896+ // Use any document base
897+ const baseElem = dom . getElementsByTagName ( 'base' ) ;
898+ if ( baseElem . length > 0 ) {
899+ const href = baseElem [ 0 ] . getAttribute ( 'href' ) ;
900+ options . base = _prependBase ( options . base || reference , href ) ;
901+ }
902+
903+ const scripts = dom . getElementsByTagName ( 'script' ) ;
904+ remoteDoc . document = [ ] ;
905+
906+ for ( let i = 0 ; i < scripts . length ; i ++ ) {
907+ const script = scripts [ i ] ;
908+ // only application/ld+json
909+ if ( ! script . getAttribute ( 'type' ) . startsWith ( 'application/ld+json' ) ) {
910+ continue ;
911+ }
912+ // If url has a fragment identifier, only matching scripts
913+ if ( frag && script . getAttribute ( 'id' ) !== frag ) {
914+ continue ;
915+ }
916+ try {
917+ remoteDoc . document . push ( JSON . parse ( script . textContent ) ) ;
918+ } catch ( e ) {
919+ throw new JsonLdError (
920+ 'Illegal script content.' ,
921+ 'jsonld.InvalidScriptElement' , {
922+ code : 'invalid script element' ,
923+ remoteDoc
924+ } ) ;
925+ }
926+ }
927+ if ( frag && remoteDoc . document . length === 0 ) {
928+ throw new JsonLdError (
929+ 'No script tag found with id=${frag}.' ,
930+ 'jsonld.InvalidScriptElement' , {
931+ code : 'invalid script element' ,
932+ remoteDoc
933+ } ) ;
934+ }
935+ if ( ! options . extractAllScripts ) {
936+ remoteDoc . document = remoteDoc . document [ 0 ] ;
937+ }
938+ } else {
939+ remoteDoc . document = JSON . parse ( remoteDoc . document ) ;
940+ }
886941 }
887942 } catch ( e ) {
888- throw new JsonLdError (
889- 'Could not retrieve a JSON-LD document from the URL.' ,
890- 'jsonld.LoadDocumentError' , {
891- code : 'loading document failed' ,
892- cause : e ,
893- remoteDoc
894- } ) ;
943+ if ( e . name === 'jsonld.InvalidScriptElement' ) {
944+ throw ( e )
945+ } else {
946+ throw new JsonLdError (
947+ 'Could not retrieve a JSON-LD document from the URL.' ,
948+ 'jsonld.LoadDocumentError' , {
949+ code : 'loading document failed' ,
950+ cause : e ,
951+ remoteDoc
952+ } ) ;
953+ }
895954 }
896955
897956 return remoteDoc ;
@@ -942,6 +1001,20 @@ jsonld.documentLoaders = {};
9421001jsonld . documentLoaders . node = require ( './documentLoaders/node' ) ;
9431002jsonld . documentLoaders . xhr = require ( './documentLoaders/xhr' ) ;
9441003
1004+ // Optional DOM parser
1005+ try {
1006+ jsonld . domParser = require ( 'xmldom' ) . DOMParser ;
1007+ } catch ( e ) {
1008+ jsonld . domParser = class NoDOMParser {
1009+ parseFromString ( ) {
1010+ throw new JsonLdError (
1011+ 'Could not parse HTML document. ' +
1012+ 'HTML parsing not implemented.' , 'jsonld.LoadDocumentError' ,
1013+ { code : 'loading document failed' } ) ;
1014+ }
1015+ } ;
1016+ }
1017+
9451018/**
9461019 * Assigns the default document loader for external document URLs to a built-in
9471020 * default. Supported types currently include: 'xhr' and 'node'.
0 commit comments