Skip to content

Commit dd05ddf

Browse files
authored
Merge pull request #7 from Koshroy/rfc7324-caching
RFC7324 Compliant Caching
2 parents e076851 + 9c1eb16 commit dd05ddf

File tree

1 file changed

+134
-0
lines changed

1 file changed

+134
-0
lines changed

ld/document_loader.go

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ import (
2222
"net/url"
2323
"os"
2424
"regexp"
25+
"time"
26+
27+
"github.com/pquerna/cachecontrol"
2528
)
2629

2730
const (
@@ -256,3 +259,134 @@ func (cdl *CachingDocumentLoader) PreloadWithMapping(urlMap map[string]string) e
256259
}
257260
return nil
258261
}
262+
263+
type cachedRemoteDocument struct {
264+
remoteDocument *RemoteDocument
265+
expireTime time.Time
266+
neverExpires bool
267+
}
268+
269+
// RFC7324CachingDocumentLoader respects RFC7324 caching headers in order to
270+
// cache effectively
271+
type RFC7324CachingDocumentLoader struct {
272+
httpClient *http.Client
273+
cache map[string]*cachedRemoteDocument
274+
}
275+
276+
// NewRFC7324CachingDocumentLoader creates a new RFC7324CachingDocumentLoader
277+
func NewRFC7324CachingDocumentLoader(httpClient *http.Client) *RFC7324CachingDocumentLoader {
278+
rval := &RFC7324CachingDocumentLoader{
279+
httpClient: httpClient,
280+
cache: make(map[string]*cachedRemoteDocument),
281+
}
282+
283+
if httpClient == nil {
284+
rval.httpClient = http.DefaultClient
285+
}
286+
287+
return rval
288+
}
289+
290+
// LoadDocument returns a RemoteDocument containing the contents of the JSON resource
291+
// from the given URL.
292+
func (rcdl *RFC7324CachingDocumentLoader) LoadDocument(u string) (*RemoteDocument, error) {
293+
entry, ok := rcdl.cache[u]
294+
now := time.Now()
295+
296+
// First we check if we hit in the cache, and the cache entry is valid
297+
// We need to check if expireTime >= now, so we negate the comparison below
298+
if ok && (entry.neverExpires || !entry.expireTime.Before(now)) {
299+
return entry.remoteDocument, nil
300+
}
301+
302+
parsedURL, err := url.Parse(u)
303+
if err != nil {
304+
return nil, NewJsonLdError(LoadingDocumentFailed, err)
305+
}
306+
307+
var documentBody io.Reader
308+
var finalURL, contextURL string
309+
310+
// We use neverExpires, shouldCache, and expireTime at the end of this method
311+
// to create an object to store in the cache. Set them to sane default values now
312+
neverExpires := false
313+
shouldCache := false
314+
expireTime := time.Now()
315+
316+
protocol := parsedURL.Scheme
317+
if protocol != "http" && protocol != "https" {
318+
// Can't use the HTTP client for those!
319+
finalURL = u
320+
var file *os.File
321+
file, err = os.Open(u)
322+
if err != nil {
323+
return nil, NewJsonLdError(LoadingDocumentFailed, err)
324+
}
325+
defer file.Close()
326+
documentBody = file
327+
neverExpires = true
328+
shouldCache = true
329+
} else {
330+
331+
req, err := http.NewRequest("GET", u, nil)
332+
// We prefer application/ld+json, but fallback to application/json
333+
// or whatever is available
334+
req.Header.Add("Accept", acceptHeader)
335+
336+
res, err := rcdl.httpClient.Do(req)
337+
338+
if err != nil {
339+
return nil, NewJsonLdError(LoadingDocumentFailed, err)
340+
}
341+
defer res.Body.Close()
342+
343+
if res.StatusCode != http.StatusOK {
344+
return nil, NewJsonLdError(LoadingDocumentFailed,
345+
fmt.Sprintf("Bad response status code: %d", res.StatusCode))
346+
}
347+
348+
finalURL = res.Request.URL.String()
349+
350+
contentType := res.Header.Get("Content-Type")
351+
linkHeader := res.Header.Get("Link")
352+
353+
if len(linkHeader) > 0 && contentType != "application/ld+json" {
354+
header := ParseLinkHeader(linkHeader)[linkHeaderRel]
355+
if len(header) > 1 {
356+
return nil, NewJsonLdError(MultipleContextLinkHeaders, nil)
357+
} else if len(header) == 1 {
358+
contextURL = header[0]["target"]
359+
}
360+
}
361+
362+
reasons, resExpireTime, err := cachecontrol.CachableResponse(req, res, cachecontrol.Options{})
363+
// If there are no errors parsing cache headers and there are no reasons not to cache, then we cache
364+
if err == nil && len(reasons) == 0 {
365+
shouldCache = true
366+
expireTime = resExpireTime
367+
}
368+
369+
documentBody = res.Body
370+
}
371+
if err != nil {
372+
return nil, NewJsonLdError(LoadingDocumentFailed, err)
373+
}
374+
document, err := DocumentFromReader(documentBody)
375+
if err != nil {
376+
return nil, err
377+
}
378+
remoteDoc := &RemoteDocument{DocumentURL: finalURL, Document: document, ContextURL: contextURL}
379+
380+
// If we went down a branch that marked shouldCache true then lets add the cache entry into
381+
// the cache
382+
if shouldCache {
383+
cacheEntry := &cachedRemoteDocument{
384+
remoteDocument: remoteDoc,
385+
expireTime: expireTime,
386+
neverExpires: neverExpires,
387+
}
388+
rcdl.cache[u] = cacheEntry
389+
}
390+
391+
return remoteDoc, nil
392+
}

0 commit comments

Comments
 (0)