@@ -758,6 +758,28 @@ class << self
758758 alias fromRDF fromRdf
759759 end
760760
761+ ##
762+ # Hash of recognized script types and the loaders that decode them
763+ # into a hash or array of hashes.
764+ #
765+ # @return Hash{type, Proc}
766+ SCRIPT_LOADERS = {
767+ 'application/ld+json' => -> ( content , url :, **options ) do
768+ validate_input ( content , url : url ) if options [ :validate ]
769+ mj_opts = options . keep_if { |k , v | k != :adapter || MUTLI_JSON_ADAPTERS . include? ( v ) }
770+ MultiJson . load ( content , **mj_opts )
771+ end
772+ }
773+
774+ ##
775+ # Adds a loader for some specific content type
776+ #
777+ # @param [String] type
778+ # @param [Proc] loader
779+ def self . add_script_loader ( type , loader )
780+ SCRIPT_LOADERS [ type ] = loader
781+ end
782+
761783 ##
762784 # Load one or more script tags from an HTML source.
763785 # Unescapes and uncomments input, returns the internal representation
@@ -812,47 +834,53 @@ def self.load_html(input, url:,
812834 element = input . at_xpath ( "//script[@id='#{ id } ']" )
813835 raise JSON ::LD ::JsonLdError ::LoadingDocumentFailed , "No script tag found with id=#{ id } " unless element
814836
815- unless element . attributes [ 'type' ] . to_s . start_with? ( 'application/ld+json' )
837+ script_type = SCRIPT_LOADERS . keys . detect { |type | element . attributes [ 'type' ] . to_s . start_with? ( type ) }
838+ unless script_type
816839 raise JSON ::LD ::JsonLdError ::LoadingDocumentFailed ,
817840 "Script tag has type=#{ element . attributes [ 'type' ] } "
818841 end
819842
820- content = element . inner_html
821- validate_input ( content , url : url ) if options [ :validate ]
822- mj_opts = options . keep_if { |k , v | k != :adapter || MUTLI_JSON_ADAPTERS . include? ( v ) }
823- MultiJson . load ( content , **mj_opts )
843+ loader = SCRIPT_LOADERS [ script_type ]
844+ loader . call ( element . inner_html , url : url , **options )
824845 elsif extractAllScripts
825846 res = [ ]
826- elements = if profile
827- es = input . xpath ( "//script[starts-with(@type, 'application/ld+json;profile=#{ profile } ')]" )
828- # If no profile script, just take a single script without profile
829- es = [ input . at_xpath ( "//script[starts-with(@type, 'application/ld+json')]" ) ] . compact if es . empty?
830- es
831- else
832- input . xpath ( "//script[starts-with(@type, 'application/ld+json')]" )
833- end
834- elements . each do |element |
835- content = element . inner_html
836- validate_input ( content , url : url ) if options [ :validate ]
837- mj_opts = options . keep_if { |k , v | k != :adapter || MUTLI_JSON_ADAPTERS . include? ( v ) }
838- r = MultiJson . load ( content , **mj_opts )
839- if r . is_a? ( Hash )
840- res << r
841- elsif r . is_a? ( Array )
842- res . concat ( r )
847+
848+ SCRIPT_LOADERS . each do |type , loader |
849+ next unless res . empty? # Only load a single type
850+ elements = if profile
851+ es = input . xpath ( "//script[starts-with(@type, '#{ type } ;profile=#{ profile } ')]" )
852+ # If no profile script, just take a single script without profile
853+ es = [ input . at_xpath ( "//script[starts-with(@type, '#{ type } ')]" ) ] . compact if es . empty?
854+ es
855+ else
856+ input . xpath ( "//script[starts-with(@type, '#{ type } ')]" )
857+ end
858+ elements . each do |element |
859+ content = element . inner_html
860+ r = loader . call ( content , url : url , extractAllScripts : true , **options )
861+ if r . is_a? ( Hash )
862+ res << r
863+ elsif r . is_a? ( Array )
864+ res . concat ( r )
865+ end
843866 end
844867 end
845868 res
846869 else
847- # Find the first script with type application/ld+json.
848- element = input . at_xpath ( "//script[starts-with(@type, 'application/ld+json;profile=#{ profile } ')]" ) if profile
849- element ||= input . at_xpath ( "//script[starts-with(@type, 'application/ld+json')]" )
850- raise JSON ::LD ::JsonLdError ::LoadingDocumentFailed , "No script tag found" unless element
870+ # Find the first script with a known type
871+ script_type , element = nil , nil
872+ SCRIPT_LOADERS . keys . each do |type |
873+ next if script_type # already found the type
874+ element = input . at_xpath ( "//script[starts-with(@type, '#{ type } ;profile=#{ profile } ')]" ) if profile
875+ element ||= input . at_xpath ( "//script[starts-with(@type, '#{ type } ')]" )
876+ script_type = type if element
877+ end
878+ unless script_type
879+ raise JSON ::LD ::JsonLdError ::LoadingDocumentFailed , "No script tag found" unless element
880+ end
851881
852882 content = element . inner_html
853- validate_input ( content , url : url ) if options [ :validate ]
854- mj_opts = options . keep_if { |k , v | k != :adapter || MUTLI_JSON_ADAPTERS . include? ( v ) }
855- MultiJson . load ( content , **mj_opts )
883+ SCRIPT_LOADERS [ script_type ] . call ( content , url : url , **options )
856884 end
857885 rescue MultiJson ::ParseError => e
858886 raise JSON ::LD ::JsonLdError ::InvalidScriptElement , e . message
0 commit comments