Skip to content

Commit 4e2fcaa

Browse files
committed
Add Context::PRELOADED, and Context.add_preloaded. When parsing an otherwise loaded context, if it exists in PRELOADED, use that instead of remotely loading it. Avoids potentially expensive context parsing for common cases.
Addresses #29.
1 parent f7b0952 commit 4e2fcaa

File tree

3 files changed

+79
-29
lines changed

3 files changed

+79
-29
lines changed

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,13 @@ Then, when performing something like expansion:
240240

241241
JSON::LD::API.expand(input, documentLoader: load_document_local)
242242

243+
244+
## Preloading contexts
245+
In many cases, for small documents, processing time can be dominated by loading and parsing remote contexts. In particular, a small schema.org example may need to download a large context and turn it into an internal representation, before the actual document can be expanded for processing. Using `JSON::LD::Context.add_preloaded`, an implementation can perform this loading up-front, and make it available to the processor.
246+
247+
ctx = JSON::LD::Context.new().parse('http://schema.org/')
248+
JSON::LD::Context.add_preloaded('http://schema.org/', ctx)
249+
243250
## RDF Reader and Writer
244251
{JSON::LD} also acts as a normal RDF reader and writer, using the standard RDF.rb reader/writer interfaces:
245252

lib/json/ld/context.rb

Lines changed: 51 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,18 @@ class Context
66
include Utils
77
include RDF::Util::Logger
88

9+
##
10+
# Preloaded contexts.
11+
# To avoid runtime context parsing and downloading, contexts may be pre-loaded by implementations.
12+
# @return [Hash{Symbol => Context}]
13+
PRELOADED = {}
14+
15+
class << self
16+
def add_preloaded(url, context)
17+
PRELOADED[url.to_sym] = context
18+
end
19+
end
20+
921
# Term Definitions specify how properties and values have to be interpreted as well as the current vocabulary mapping and the default language
1022
class TermDefinition
1123
# @return [RDF::URI] IRI map
@@ -279,9 +291,11 @@ def parse(local_context, remote_contexts = [])
279291
end
280292
when String, RDF::URI
281293
log_debug("parse") {"remote: #{context}, base: #{result.context_base || result.base}"}
282-
# Load context document, if it is a string
294+
283295
# 3.2.1) Set context to the result of resolving value against the base IRI which is established as specified in section 5.1 Establishing a Base URI of [RFC3986]. Only the basic algorithm in section 5.2 of [RFC3986] is used; neither Syntax-Based Normalization nor Scheme-Based Normalization are performed. Characters additionally allowed in IRI references are treated in the same way that unreserved characters are treated in URI references, per section 6.5 of [RFC3987].
284296
context = RDF::URI(result.context_base || result.base).join(context)
297+
context_canon = RDF::URI(context).canonicalize
298+
context_canon.scheme = 'http' if context_canon.scheme == 'https'
285299

286300
raise JsonLdError::RecursiveContextInclusion, "#{context}" if remote_contexts.include?(context.to_s)
287301
remote_contexts << context.to_s
@@ -290,34 +304,42 @@ def parse(local_context, remote_contexts = [])
290304
context_no_base.base = nil
291305
context_no_base.context_base = context.to_s
292306

293-
begin
294-
context_opts = @options.dup
295-
context_opts.delete(:headers)
296-
@options[:documentLoader].call(context.to_s, context_opts) do |remote_doc|
297-
# 3.2.5) Dereference context. If the dereferenced document has no top-level JSON object with an @context member, an invalid remote context has been detected and processing is aborted; otherwise, set context to the value of that member.
298-
jo = case remote_doc.document
299-
when String then MultiJson.load(remote_doc.document)
300-
else remote_doc.document
301-
end
302-
raise JsonLdError::InvalidRemoteContext, "#{context}" unless jo.is_a?(Hash) && jo.has_key?('@context')
303-
context = jo['@context']
304-
if @options[:processingMode] == "json-ld-1.0"
305-
context_no_base.provided_context = context.dup
307+
if PRELOADED[context_canon.to_s.to_sym]
308+
# If we have a cached context, merge it into the current context (result) and use as the new context
309+
log_debug("parse") {"=> cached_context: #{context_canon.to_s.inspect}"}
310+
context = context_no_base.merge!(PRELOADED[context_canon.to_s.to_sym])
311+
else
312+
313+
# Load context document, if it is a string
314+
begin
315+
context_opts = @options.dup
316+
context_opts.delete(:headers)
317+
@options[:documentLoader].call(context.to_s, context_opts) do |remote_doc|
318+
# 3.2.5) Dereference context. If the dereferenced document has no top-level JSON object with an @context member, an invalid remote context has been detected and processing is aborted; otherwise, set context to the value of that member.
319+
jo = case remote_doc.document
320+
when String then MultiJson.load(remote_doc.document)
321+
else remote_doc.document
322+
end
323+
raise JsonLdError::InvalidRemoteContext, "#{context}" unless jo.is_a?(Hash) && jo.has_key?('@context')
324+
context = jo['@context']
325+
if @options[:processingMode] == "json-ld-1.0"
326+
context_no_base.provided_context = context.dup
327+
end
306328
end
329+
rescue JsonLdError::LoadingDocumentFailed => e
330+
log_debug("parse") {"Failed to retrieve @context from remote document at #{context_no_base.context_base.inspect}: #{e.message}"}
331+
raise JsonLdError::LoadingRemoteContextFailed, "#{context_no_base.context_base}", e.backtrace
332+
rescue JsonLdError
333+
raise
334+
rescue Exception => e
335+
log_debug("parse") {"Failed to retrieve @context from remote document at #{context_no_base.context_base.inspect}: #{e.message}"}
336+
raise JsonLdError::LoadingRemoteContextFailed, "#{context_no_base.context_base}", e.backtrace
307337
end
308-
rescue JsonLdError::LoadingDocumentFailed => e
309-
log_debug("parse") {"Failed to retrieve @context from remote document at #{context_no_base.context_base.inspect}: #{e.message}"}
310-
raise JsonLdError::LoadingRemoteContextFailed, "#{context_no_base.context_base}", e.backtrace
311-
rescue JsonLdError
312-
raise
313-
rescue Exception => e
314-
log_debug("parse") {"Failed to retrieve @context from remote document at #{context_no_base.context_base.inspect}: #{e.message}"}
315-
raise JsonLdError::LoadingRemoteContextFailed, "#{context_no_base.context_base}", e.backtrace
316-
end
317338

318-
# 3.2.6) Set context to the result of recursively calling this algorithm, passing context no base for active context, context for local context, and remote contexts.
319-
context = context_no_base.parse(context, remote_contexts.dup)
320-
context.provided_context = result.provided_context
339+
# 3.2.6) Set context to the result of recursively calling this algorithm, passing context no base for active context, context for local context, and remote contexts.
340+
context = context_no_base.parse(context, remote_contexts.dup)
341+
context.provided_context = result.provided_context
342+
end
321343
context.base ||= result.base
322344
result = context
323345
log_debug("parse") {"=> provided_context: #{context.inspect}"}
@@ -1314,9 +1336,9 @@ def remove_base(iri)
13141336
b = base.to_s
13151337
return iri[b.length..-1] if iri.start_with?(b) && %w(? #).include?(iri[b.length, 1])
13161338

1317-
@base_and_parents.each_with_index do |b, index|
1318-
next unless iri.start_with?(b)
1319-
rel = "../" * index + iri[b.length..-1]
1339+
@base_and_parents.each_with_index do |bb, index|
1340+
next unless iri.start_with?(bb)
1341+
rel = "../" * index + iri[bb.length..-1]
13201342
return rel.empty? ? "./" : rel
13211343
end
13221344
iri

spec/context_spec.rb

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,27 @@ def containers
9292
expect(ec.provided_context).to produce(ctx, logger)
9393
end
9494
end
95+
96+
context "pre-loaded remote" do
97+
let(:ctx) {"http://example.com/preloaded"}
98+
before(:all) {
99+
JSON::LD::Context.add_preloaded("http://example.com/preloaded",
100+
JSON::LD::Context.new().parse({'foo' => "http://example.com/"})
101+
)}
102+
after(:all) {JSON::LD::Context::PRELOADED.clear}
103+
104+
it "does not load referenced context" do
105+
expect(JSON::LD::API).not_to receive(:documentLoader).with(ctx, anything)
106+
ec = subject.parse(ctx)
107+
end
108+
109+
it "uses loaded context" do
110+
ec = subject.parse(ctx)
111+
expect(ec.send(:mappings)).to produce({
112+
"foo" => "http://example.com/"
113+
}, logger)
114+
end
115+
end
95116
end
96117

97118
context "Array" do

0 commit comments

Comments
 (0)