Skip to content

Commit 408f6d1

Browse files
committed
Support :skyscraper/description
To facilitate debugging, processors can now set the :skyscraper/description key on contexts. These descriptions will be logged when downloading, instead of the URL. This is a one-off key, meaning that it will be not propagated to the scraping subtree. This commit also makes :skyscraper.traverse/priority one-off.
1 parent d31d23a commit 408f6d1

File tree

2 files changed

+13
-5
lines changed

2 files changed

+13
-5
lines changed

src/skyscraper/context.clj

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
(defn dissoc-internal
44
"Dissocs the context keys that shouldn't be carried over to further processing."
55
[context]
6-
(let [removed-keys #{:processor :url :skyscraper.core/new-items :skyscraper.core/retry}]
6+
(let [removed-keys #{:processor :url :skyscraper.core/new-items :skyscraper.core/retry
7+
:skyscraper/description :skyscraper.traverse/priority}]
78
(into {}
89
(remove (fn [[k _]] (or (contains? removed-keys k)
910
(and (keyword? k)
@@ -24,6 +25,8 @@
2425
:skyscraper.core/stage
2526
:skyscraper.traverse/handler
2627
:skyscraper.traverse/call-protocol
28+
:skyscraper.traverse/priority
29+
:skyscraper/description
2730
:http/cookies))
2831

2932
(defn describe
@@ -38,3 +41,8 @@
3841
true (merge (select-keys context [:processor :url])) ; reattach
3942
processor (assoc :skyscraper.core/current-processor-name (:name processor))
4043
true pr-str)))
44+
45+
(defn describe-url
46+
[context]
47+
(or (:skyscraper/description context)
48+
(:url context)))

src/skyscraper/core.clj

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@
378378
(let [req (merge {:method :get, :url (:url context)}
379379
(extract-namespaced-keys "http" context))
380380
success-fn (fn [response]
381-
(debugf "[download] Downloaded %s" (:url context))
381+
(debugf "[download] Downloaded %s" (context/describe-url context))
382382
(.release download-semaphore)
383383
(callback (respond-with response options context)))
384384
error-fn (fn [error]
@@ -387,7 +387,7 @@
387387
(callback (error-handler error options context))))]
388388
(debugf "[download] Waiting")
389389
(.acquire download-semaphore)
390-
(infof "[download] Downloading %s" (:url context))
390+
(infof "[download] Downloading %s" (context/describe-url context))
391391
(let [req (merge {:async? true,
392392
:connection-manager connection-manager}
393393
req (get-option context options :http-options))
@@ -407,10 +407,10 @@
407407
request-fn (or (:request-fn options)
408408
http/request)]
409409
(try
410-
(infof "[download] Downloading %s" (:url context))
410+
(infof "[download] Downloading %s" (context/describe-url context))
411411
(wait sleep)
412412
(let [resp (request-fn req)]
413-
(debugf "[download] Downloaded %s" (:url context))
413+
(debugf "[download] Downloaded %s" (context/describe-url context))
414414
[(cond-> (advance-pipeline pipeline context)
415415
true (assoc ::response resp)
416416
(:cookies resp) (update :http/cookies merge (:cookies resp)))])

0 commit comments

Comments
 (0)