@@ -42,6 +42,7 @@ const LRU = require('lru-cache');
4242const NQuads = require ( './NQuads' ) ;
4343const Rdfa = require ( './Rdfa' ) ;
4444
45+ const { prependBase : _prependBase } = require ( './url' ) ;
4546const { expand : _expand } = require ( './expand' ) ;
4647const { flatten : _flatten } = require ( './flatten' ) ;
4748const { fromRDF : _fromRDF } = require ( './fromRdf' ) ;
@@ -854,6 +855,9 @@ jsonld.documentLoader = async url => {
854855 * @param url the URL to fetch.
855856 * @param [options] the options to use:
856857 * [documentLoader] the document loader to use.
858+ * [extractAllScripts] concatenates all matching script elements..
859+ * [profile] used when selecting from HTML script elements.
860+ * [requestProfile] one or more profile IRIs to use in the request.
857861 *
858862 * @return a Promise that resolves to the retrieved remote document.
859863 */
@@ -865,7 +869,10 @@ jsonld.get = async function(url, options) {
865869 load = jsonld . documentLoader ;
866870 }
867871
868- const remoteDoc = await load ( url ) ;
872+ // FIXME: unescape frag?
873+ const [ reference , frag ] = url . split ( '#' , 2 ) ;
874+
875+ const remoteDoc = await load ( reference , options ) ;
869876
870877 try {
871878 if ( ! remoteDoc . document ) {
@@ -874,16 +881,68 @@ jsonld.get = async function(url, options) {
874881 'jsonld.NullRemoteDocument' ) ;
875882 }
876883 if ( _isString ( remoteDoc . document ) ) {
877- remoteDoc . document = JSON . parse ( remoteDoc . document ) ;
884+ if ( remoteDoc . contentType && remoteDoc . contentType . includes ( 'text/html' ) ) {
885+ const domParser = new jsonld . domParser ( ) ;
886+ const dom = domParser . parseFromString ( remoteDoc . document ) ;
887+
888+ // Use any document base
889+ const baseElem = dom . getElementsByTagName ( 'base' ) ;
890+ if ( baseElem . length > 0 ) {
891+ const href = baseElem [ 0 ] . getAttribute ( 'href' ) ;
892+ options . base = _prependBase ( options . base || reference , href ) ;
893+ }
894+
895+ const scripts = dom . getElementsByTagName ( 'script' ) ;
896+ remoteDoc . document = [ ] ;
897+
898+ for ( let i = 0 ; i < scripts . length ; i ++ ) {
899+ const script = scripts [ i ] ;
900+ // only application/ld+json
901+ if ( ! script . getAttribute ( 'type' ) . startsWith ( 'application/ld+json' ) ) {
902+ continue ;
903+ }
904+ // If url has a fragment identifier, only matching scripts
905+ if ( frag && script . getAttribute ( 'id' ) !== frag ) {
906+ continue ;
907+ }
908+ try {
909+ remoteDoc . document . push ( JSON . parse ( script . textContent ) ) ;
910+ } catch ( e ) {
911+ throw new JsonLdError (
912+ 'Illegal script content.' ,
913+ 'jsonld.InvalidScriptElement' , {
914+ code : 'invalid script element' ,
915+ remoteDoc
916+ } ) ;
917+ }
918+ }
919+ if ( frag && remoteDoc . document . length === 0 ) {
920+ throw new JsonLdError (
921+ 'No script tag found with id=${frag}.' ,
922+ 'jsonld.InvalidScriptElement' , {
923+ code : 'invalid script element' ,
924+ remoteDoc
925+ } ) ;
926+ }
927+ if ( ! options . extractAllScripts ) {
928+ remoteDoc . document = remoteDoc . document [ 0 ] ;
929+ }
930+ } else {
931+ remoteDoc . document = JSON . parse ( remoteDoc . document ) ;
932+ }
878933 }
879934 } catch ( e ) {
880- throw new JsonLdError (
881- 'Could not retrieve a JSON-LD document from the URL.' ,
882- 'jsonld.LoadDocumentError' , {
883- code : 'loading document failed' ,
884- cause : e ,
885- remoteDoc
886- } ) ;
935+ if ( e . name === 'jsonld.InvalidScriptElement' ) {
936+ throw ( e )
937+ } else {
938+ throw new JsonLdError (
939+ 'Could not retrieve a JSON-LD document from the URL.' ,
940+ 'jsonld.LoadDocumentError' , {
941+ code : 'loading document failed' ,
942+ cause : e ,
943+ remoteDoc
944+ } ) ;
945+ }
887946 }
888947
889948 return remoteDoc ;
@@ -934,6 +993,20 @@ jsonld.documentLoaders = {};
934993jsonld . documentLoaders . node = require ( './documentLoaders/node' ) ;
935994jsonld . documentLoaders . xhr = require ( './documentLoaders/xhr' ) ;
936995
996+ // Optional DOM parser
997+ try {
998+ jsonld . domParser = require ( 'xmldom' ) . DOMParser ;
999+ } catch ( e ) {
1000+ jsonld . domParser = class NoDOMParser {
1001+ parseFromString ( ) {
1002+ throw new JsonLdError (
1003+ 'Could not parse HTML document. ' +
1004+ 'HTML parsing not implemented.' , 'jsonld.LoadDocumentError' ,
1005+ { code : 'loading document failed' } ) ;
1006+ }
1007+ } ;
1008+ }
1009+
9371010/**
9381011 * Assigns the default document loader for external document URLs to a built-in
9391012 * default. Supported types currently include: 'xhr' and 'node'.
0 commit comments