@@ -6,15 +6,15 @@ use url::Url;
66const AGENT : & ' static str = "test_robotparser" ;
77
88fn robot_test ( doc : & str , good_urls : Vec < & str > , bad_urls : Vec < & str > , agent : & str ) {
9- let url = Url :: parse ( "http ://www.baidu.com/robots.txt" ) . unwrap ( ) ;
9+ let url = Url :: parse ( "https ://www.baidu.com/robots.txt" ) . unwrap ( ) ;
1010 let parser = parse_robots_txt ( url. origin ( ) , doc) . get_result ( ) ;
1111 for url in & good_urls {
12- let url = format ! ( "http ://www.baidu.com{}" , url) ;
12+ let url = format ! ( "https ://www.baidu.com{}" , url) ;
1313 let url = Url :: parse ( & url) . unwrap ( ) ;
1414 assert ! ( parser. can_fetch( agent, & url) ) ;
1515 }
1616 for url in & bad_urls {
17- let url = format ! ( "http ://www.baidu.com{}" , url) ;
17+ let url = format ! ( "https ://www.baidu.com{}" , url) ;
1818 let url = Url :: parse ( & url) . unwrap ( ) ;
1919 assert ! ( !parser. can_fetch( agent, & url) ) ;
2020 }
@@ -56,7 +56,7 @@ fn test_robots_txt_1() {
5656#[ test]
5757fn test_robots_txt_2 ( ) {
5858 let doc = "\n \
59- # robots.txt for http ://www.example.com/\n \
59+ # robots.txt for https ://www.example.com/\n \
6060 \n \
6161 User-agent: *\n \
6262 Disallow: /cyberworld/map/ # This is an infinite virtual URL space\n \
@@ -249,7 +249,7 @@ fn test_robots_txt_read() {
249249 use robotparser:: http:: { CreateRobotsTxtRequest , ParseRobotsTxtResponse } ;
250250 use reqwest:: { Client , Request } ;
251251 let http_client = Client :: new ( ) ;
252- let url = Url :: parse ( "http ://www.python.org/robots.txt" ) . unwrap ( ) ;
252+ let url = Url :: parse ( "https ://www.python.org/robots.txt" ) . unwrap ( ) ;
253253 let request = Request :: create_robots_txt_request ( url. origin ( ) ) ;
254254 let mut response = http_client. execute ( request) . unwrap ( ) ;
255255 let parser = response. parse_robots_txt_response ( ) . unwrap ( ) . get_result ( ) ;
@@ -258,7 +258,7 @@ fn test_robots_txt_read() {
258258
259259#[ test]
260260fn test_robots_text_crawl_delay ( ) {
261- let robots_txt_url = Url :: parse ( "http ://www.python.org/robots.txt" ) . unwrap ( ) ;
261+ let robots_txt_url = Url :: parse ( "https ://www.python.org/robots.txt" ) . unwrap ( ) ;
262262 let doc = "User-agent: Yandex\n \
263263 Crawl-delay: 2.35\n \
264264 Disallow: /search/\n ";
@@ -268,26 +268,26 @@ fn test_robots_text_crawl_delay() {
268268
269269#[ test]
270270fn test_robots_text_sitemaps ( ) {
271- let robots_txt_url = Url :: parse ( "http ://www.python.org/robots.txt" ) . unwrap ( ) ;
271+ let robots_txt_url = Url :: parse ( "https ://www.python.org/robots.txt" ) . unwrap ( ) ;
272272 let doc = "User-agent: Yandex\n \
273- Sitemap \t : http ://example.com/sitemap1.xml\n
274- Sitemap: http ://example.com/sitemap2.xml\n
275- Sitemap: http ://example.com/sitemap3.xml\n
273+ Sitemap \t : https ://example.com/sitemap1.xml\n
274+ Sitemap: https ://example.com/sitemap2.xml\n
275+ Sitemap: https ://example.com/sitemap3.xml\n
276276 Disallow: /search/\n " ;
277277 let parser = parse_robots_txt ( robots_txt_url. origin ( ) , doc) . get_result ( ) ;
278278 assert_eq ! (
279279 & [
280- Url :: parse( "http ://example.com/sitemap1.xml" ) . unwrap( ) ,
281- Url :: parse( "http ://example.com/sitemap2.xml" ) . unwrap( ) ,
282- Url :: parse( "http ://example.com/sitemap3.xml" ) . unwrap( )
280+ Url :: parse( "https ://example.com/sitemap1.xml" ) . unwrap( ) ,
281+ Url :: parse( "https ://example.com/sitemap2.xml" ) . unwrap( ) ,
282+ Url :: parse( "https ://example.com/sitemap3.xml" ) . unwrap( )
283283 ] ,
284284 parser. get_sitemaps( )
285285 ) ;
286286}
287287
288288#[ test]
289289fn test_robots_text_request_rate ( ) {
290- let robots_txt_url = Url :: parse ( "http ://www.python.org/robots.txt" ) . unwrap ( ) ;
290+ let robots_txt_url = Url :: parse ( "https ://www.python.org/robots.txt" ) . unwrap ( ) ;
291291 let doc =
292292 "User-agent: Yandex\n \
293293 Request-rate: 3/15\n \
@@ -313,15 +313,15 @@ Clean-param: gid\n\
313313 Clean-param: tm\n \
314314 Clean-param: amp\n \
315315 ";
316- let url = Url :: parse ( "http ://www.baidu.com/robots.txt" ) . unwrap ( ) ;
316+ let url = Url :: parse ( "https ://www.baidu.com/robots.txt" ) . unwrap ( ) ;
317317 let parser = parse_robots_txt ( url. origin ( ) , doc) . get_result ( ) ;
318- let mut site_url = Url :: parse ( "http ://www.baidu.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&=1" ) . unwrap ( ) ;
318+ let mut site_url = Url :: parse ( "https ://www.baidu.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&=1" ) . unwrap ( ) ;
319319 let was_updated = parser. normalize_url ( & mut site_url) ;
320320 assert_eq ! ( was_updated, true ) ;
321- assert_eq ! ( site_url. as_str( ) , "http ://www.baidu.com/test?post_id=7777" ) ;
321+ assert_eq ! ( site_url. as_str( ) , "https ://www.baidu.com/test?post_id=7777" ) ;
322322
323- let mut site_url = Url :: parse ( "http ://www.google.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&=1" ) . unwrap ( ) ;
323+ let mut site_url = Url :: parse ( "https ://www.google.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&=1" ) . unwrap ( ) ;
324324 let was_updated = parser. normalize_url ( & mut site_url) ;
325325 assert_eq ! ( was_updated, false ) ;
326- assert_eq ! ( site_url. as_str( ) , "http ://www.google.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&=1" ) ;
326+ assert_eq ! ( site_url. as_str( ) , "https ://www.google.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&=1" ) ;
327327}
0 commit comments