diff --git a/src/clj_http/client.clj b/src/clj_http/client.clj index 7d01d886..52e31ffc 100644 --- a/src/clj_http/client.clj +++ b/src/clj_http/client.clj @@ -386,14 +386,12 @@ [resp] (-> resp (update :body util/gunzip) - (assoc :orig-content-encoding (get-in resp [:headers "content-encoding"])) (dissoc-in [:headers "content-encoding"]))) (defmethod decompress-body "deflate" [resp] (-> resp (update :body util/inflate) - (assoc :orig-content-encoding (get-in resp [:headers "content-encoding"])) (dissoc-in [:headers "content-encoding"]))) (defmethod decompress-body :default [resp] @@ -412,7 +410,34 @@ [req resp] (if (false? (opt req :decompress-body)) resp - (decompress-body resp))) + ;; If we are decompressing the body there might be multiple content + ;; encodings. + ;; https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Content-Encoding + ;; Take each content encoding (in reverse order) and try to decompress the + ;; body using that content encording recursively. This way if content is + ;; compressed twice. Once with zlib and again with gzip, then we can + ;; properly decompress it in the correct order. + (loop [[encoding & rest-encodings] (some-> resp + (get-in [:headers "content-encoding"]) + (str/split #",") + (reverse)) + ;; Store the original content encoding somewhere so it can be + ;; referenced if needed. + resp (assoc resp + :orig-content-encoding + (get-in resp [:headers "content-encoding"]))] + (if encoding + ;; If there is an encoding then decompress the body using that encoding + ;; then recur with the remaining encodings. + (recur + rest-encodings + (decompress-body + ;; Force the content encoding to be a single encoding type for this. + (assoc-in resp + [:headers "content-encoding"] + (str/trim encoding)))) + ;; If there are no encodings left then we are done. + resp)))) (defn wrap-decompression "Middleware handling automatic decompression of responses from web servers. If diff --git a/test/clj_http/test/client_test.clj b/test/clj_http/test/client_test.clj index 1b111e99..a179a8e0 100644 --- a/test/clj_http/test/client_test.clj +++ b/test/clj_http/test/client_test.clj @@ -694,6 +694,20 @@ (is (= "gzip" (:orig-content-encoding resp))) (is (= nil (get-in resp [:headers "content-encoding"]))))) +(deftest apply-on-compressed-layered + (let [client (fn [req] + (is (= "gzip, deflate" + (get-in req [:headers "accept-encoding"]))) + {:body (util/gzip (util/deflate (util/utf8-bytes "foofoofoo"))) + :headers {"content-encoding" "deflate, gzip"}}) + c-client (client/wrap-decompression client) + resp (c-client {})] + ;; The response body here is going to be an input stream because of the + ;; nesting. So we need to slurp it, or force it to be a string of some sort. + (is (= "foofoofoo" (slurp (:body resp)))) + (is (= "deflate, gzip" (:orig-content-encoding resp))) + (is (= nil (get-in resp [:headers "content-encoding"]))))) + (deftest apply-on-compressed-async (let [client (fn [req respond raise] (is (= "gzip, deflate"