22from scrapy_proxy_headers .agent import ScrapyProxyHeadersAgent
33
44class HTTP11ProxyDownloadHandler (HTTP11DownloadHandler ):
5+ def __init__ (self , * args , ** kwargs ):
6+ super ().__init__ (* args , ** kwargs )
7+ self ._proxy_headers_by_proxy = {}
8+
59 def download_request (self , request , spider ):
610 """Return a deferred for the HTTP download"""
711 agent = ScrapyProxyHeadersAgent (
@@ -12,4 +16,22 @@ def download_request(self, request, spider):
1216 fail_on_dataloss = self ._fail_on_dataloss ,
1317 crawler = self ._crawler ,
1418 )
15- return agent .download_request (request )
19+ response = agent .download_request (request )
20+ proxy = request .meta .get ("proxy" )
21+
22+ if proxy :
23+ # we need to do all this because the proxy tunnels can get re-used
24+ # when that happens, the proxy headers are not available in subsequent responses
25+ # so we need to save the proxy headers by the proxy, from the first tunnel response
26+ # so we can add them to subsequent responses
27+ def callback (response ):
28+ if hasattr (response , '_proxy_response_headers' ):
29+ self ._proxy_headers_by_proxy [proxy ] = response ._proxy_response_headers
30+
31+ if proxy in self ._proxy_headers_by_proxy :
32+ response .headers .update (self ._proxy_headers_by_proxy [proxy ])
33+
34+ return response
35+
36+ response .addCallback (callback )
37+ return response
0 commit comments