1+ use reqwest:: { Client , Request } ;
2+ use reqwest:: { Method , Error } ;
3+ use reqwest:: header:: HeaderValue ;
4+ use url:: { Origin , Url } ;
5+ use reqwest:: header:: USER_AGENT ;
6+ use crate :: http:: { RobotsTxtClient , DEFAULT_USER_AGENT } ;
7+ use crate :: parser:: { ParseResult , parse_fetched_robots_txt} ;
8+ use crate :: model:: FetchedRobotsTxt ;
9+ use std:: pin:: Pin ;
10+ use futures:: task:: { Context , Poll } ;
11+ use futures:: Future ;
12+ use futures:: future:: TryFutureExt ;
13+ use futures:: future:: ok as future_ok;
14+
15+ type FetchFuture = Box < dyn Future < Output =Result < ( ResponseInfo , String ) , Error > > > ;
16+
17+ impl RobotsTxtClient for Client {
18+ type Result = RobotsTxtResponse ;
19+ fn fetch_robots_txt ( & self , origin : Origin ) -> Self :: Result {
20+ let url = format ! ( "{}/robots.txt" , origin. unicode_serialization( ) ) ;
21+ let url = Url :: parse ( & url) . expect ( "Unable to parse robots.txt url" ) ;
22+ let mut request = Request :: new ( Method :: GET , url) ;
23+ let _ = request. headers_mut ( ) . insert ( USER_AGENT , HeaderValue :: from_static ( DEFAULT_USER_AGENT ) ) ;
24+ let response = self
25+ . execute ( request)
26+ . and_then ( |response| {
27+ let response_info = ResponseInfo { status_code : response. status ( ) . as_u16 ( ) } ;
28+ return response. text ( ) . and_then ( |response_text| {
29+ return future_ok ( ( response_info, response_text) ) ;
30+ } ) ;
31+ } ) ;
32+ let response: Pin < Box < dyn Future < Output =Result < ( ResponseInfo , String ) , Error > > > > = Box :: pin ( response) ;
33+ return RobotsTxtResponse {
34+ origin,
35+ response,
36+ }
37+ }
38+ }
39+
40+ struct ResponseInfo {
41+ status_code : u16 ,
42+ }
43+
44+ /// Future for fetching robots.txt result.
45+ pub struct RobotsTxtResponse {
46+ origin : Origin ,
47+ response : Pin < FetchFuture > ,
48+ }
49+
50+ impl RobotsTxtResponse {
51+ /// Returns origin of robots.txt
52+ pub fn get_origin ( & self ) -> & Origin {
53+ return & self . origin ;
54+ }
55+ }
56+
57+ impl Future for RobotsTxtResponse {
58+ type Output = Result < ParseResult < FetchedRobotsTxt > , Error > ;
59+
60+ fn poll ( self : Pin < & mut Self > , cx : & mut Context ) -> Poll < Self :: Output > {
61+ let self_mut = self . get_mut ( ) ;
62+ let response_pin = self_mut. response . as_mut ( ) ;
63+ match response_pin. poll ( cx) {
64+ Poll :: Ready ( Ok ( ( response_info, text) ) ) => {
65+ let robots_txt = parse_fetched_robots_txt ( self_mut. origin . clone ( ) , response_info. status_code , & text) ;
66+ return Poll :: Ready ( Ok ( robots_txt) ) ;
67+ } ,
68+ Poll :: Ready ( Err ( error) ) => {
69+ return Poll :: Ready ( Err ( error) ) ;
70+ } ,
71+ Poll :: Pending => {
72+ return Poll :: Pending ;
73+ } ,
74+ }
75+ }
76+ }
0 commit comments