@@ -22,6 +22,9 @@ import (
2222 "net/url"
2323 "os"
2424 "regexp"
25+ "time"
26+
27+ "github.com/pquerna/cachecontrol"
2528)
2629
2730const (
@@ -256,3 +259,134 @@ func (cdl *CachingDocumentLoader) PreloadWithMapping(urlMap map[string]string) e
256259 }
257260 return nil
258261}
262+
263+ type cachedRemoteDocument struct {
264+ remoteDocument * RemoteDocument
265+ expireTime time.Time
266+ neverExpires bool
267+ }
268+
269+ // RFC7324CachingDocumentLoader respects RFC7324 caching headers in order to
270+ // cache effectively
271+ type RFC7324CachingDocumentLoader struct {
272+ httpClient * http.Client
273+ cache map [string ]* cachedRemoteDocument
274+ }
275+
276+ // NewRFC7324CachingDocumentLoader creates a new RFC7324CachingDocumentLoader
277+ func NewRFC7324CachingDocumentLoader (httpClient * http.Client ) * RFC7324CachingDocumentLoader {
278+ rval := & RFC7324CachingDocumentLoader {
279+ httpClient : httpClient ,
280+ cache : make (map [string ]* cachedRemoteDocument ),
281+ }
282+
283+ if httpClient == nil {
284+ rval .httpClient = http .DefaultClient
285+ }
286+
287+ return rval
288+ }
289+
290+ // LoadDocument returns a RemoteDocument containing the contents of the JSON resource
291+ // from the given URL.
292+ func (rcdl * RFC7324CachingDocumentLoader ) LoadDocument (u string ) (* RemoteDocument , error ) {
293+ entry , ok := rcdl .cache [u ]
294+ now := time .Now ()
295+
296+ // First we check if we hit in the cache, and the cache entry is valid
297+ // We need to check if expireTime >= now, so we negate the comparison below
298+ if ok && (entry .neverExpires || ! entry .expireTime .Before (now )) {
299+ return entry .remoteDocument , nil
300+ }
301+
302+ parsedURL , err := url .Parse (u )
303+ if err != nil {
304+ return nil , NewJsonLdError (LoadingDocumentFailed , err )
305+ }
306+
307+ var documentBody io.Reader
308+ var finalURL , contextURL string
309+
310+ // We use neverExpires, shouldCache, and expireTime at the end of this method
311+ // to create an object to store in the cache. Set them to sane default values now
312+ neverExpires := false
313+ shouldCache := false
314+ expireTime := time .Now ()
315+
316+ protocol := parsedURL .Scheme
317+ if protocol != "http" && protocol != "https" {
318+ // Can't use the HTTP client for those!
319+ finalURL = u
320+ var file * os.File
321+ file , err = os .Open (u )
322+ if err != nil {
323+ return nil , NewJsonLdError (LoadingDocumentFailed , err )
324+ }
325+ defer file .Close ()
326+ documentBody = file
327+ neverExpires = true
328+ shouldCache = true
329+ } else {
330+
331+ req , err := http .NewRequest ("GET" , u , nil )
332+ // We prefer application/ld+json, but fallback to application/json
333+ // or whatever is available
334+ req .Header .Add ("Accept" , acceptHeader )
335+
336+ res , err := rcdl .httpClient .Do (req )
337+
338+ if err != nil {
339+ return nil , NewJsonLdError (LoadingDocumentFailed , err )
340+ }
341+ defer res .Body .Close ()
342+
343+ if res .StatusCode != http .StatusOK {
344+ return nil , NewJsonLdError (LoadingDocumentFailed ,
345+ fmt .Sprintf ("Bad response status code: %d" , res .StatusCode ))
346+ }
347+
348+ finalURL = res .Request .URL .String ()
349+
350+ contentType := res .Header .Get ("Content-Type" )
351+ linkHeader := res .Header .Get ("Link" )
352+
353+ if len (linkHeader ) > 0 && contentType != "application/ld+json" {
354+ header := ParseLinkHeader (linkHeader )[linkHeaderRel ]
355+ if len (header ) > 1 {
356+ return nil , NewJsonLdError (MultipleContextLinkHeaders , nil )
357+ } else if len (header ) == 1 {
358+ contextURL = header [0 ]["target" ]
359+ }
360+ }
361+
362+ reasons , resExpireTime , err := cachecontrol .CachableResponse (req , res , cachecontrol.Options {})
363+ // If there are no errors parsing cache headers and there are no reasons not to cache, then we cache
364+ if err == nil && len (reasons ) == 0 {
365+ shouldCache = true
366+ expireTime = resExpireTime
367+ }
368+
369+ documentBody = res .Body
370+ }
371+ if err != nil {
372+ return nil , NewJsonLdError (LoadingDocumentFailed , err )
373+ }
374+ document , err := DocumentFromReader (documentBody )
375+ if err != nil {
376+ return nil , err
377+ }
378+ remoteDoc := & RemoteDocument {DocumentURL : finalURL , Document : document , ContextURL : contextURL }
379+
380+ // If we went down a branch that marked shouldCache true then lets add the cache entry into
381+ // the cache
382+ if shouldCache {
383+ cacheEntry := & cachedRemoteDocument {
384+ remoteDocument : remoteDoc ,
385+ expireTime : expireTime ,
386+ neverExpires : neverExpires ,
387+ }
388+ rcdl .cache [u ] = cacheEntry
389+ }
390+
391+ return remoteDoc , nil
392+ }
0 commit comments