1111import org .junit .Ignore ;
1212import org .junit .Test ;
1313
14+ import us .codecraft .webmagic .Page ;
15+ import us .codecraft .webmagic .Spider ;
16+ import us .codecraft .webmagic .processor .PageProcessor ;
1417import us .codecraft .xsoup .XPathEvaluator ;
1518import us .codecraft .xsoup .Xsoup ;
1619
@@ -1385,35 +1388,52 @@ public void testXpath2Selector() {
13851388 Assert .assertEquals ("http://www.oschina.net/" , selectList .get (0 ));
13861389 }
13871390
1391+ @ Ignore ("test parse <table> <tr> <td> tag" )
1392+ @ Test
1393+ public void htmlCleanerParseTest () {
1394+ Spider .create (new RuoxiaPageProcessor ()).addUrl ("http://www.ruoxia.com/top/dianji/month" ).thread (1 ).run ();
1395+ }
1396+
1397+ class RuoxiaPageProcessor implements PageProcessor {
1398+ @ Override
1399+ public void process (Page page ) {
1400+ List <String > items = new Xpath2Selector ("//div[@class=\" bd\" ]//tbody/tr" ).selectList (page .getRawText ());
1401+ for (String item : items ) {
1402+ String name = new Xpath2Selector ("//td[3]/div/a[1]/text()" ).select (item );
1403+ System .out .println (name );
1404+ }
1405+ }
1406+ }
1407+
13881408 @ Ignore ("take long time" )
13891409 @ Test
13901410 public void performanceTest () {
13911411 Xpath2Selector xpath2Selector = new Xpath2Selector ("//a" );
1392- long time =System .currentTimeMillis ();
1412+ long time = System .currentTimeMillis ();
13931413 for (int i = 0 ; i < 1000 ; i ++) {
13941414 xpath2Selector .selectList (html );
13951415 }
1396- System .out .println (System .currentTimeMillis ()- time );
1416+ System .out .println (System .currentTimeMillis () - time );
13971417
13981418 XpathSelector xpathSelector = new XpathSelector ("//a" );
1399- time =System .currentTimeMillis ();
1419+ time = System .currentTimeMillis ();
14001420 for (int i = 0 ; i < 1000 ; i ++) {
14011421 xpathSelector .selectList (html );
14021422 }
1403- System .out .println (System .currentTimeMillis ()- time );
1423+ System .out .println (System .currentTimeMillis () - time );
14041424
1405- time =System .currentTimeMillis ();
1425+ time = System .currentTimeMillis ();
14061426 for (int i = 0 ; i < 1000 ; i ++) {
14071427 xpath2Selector .selectList (html );
14081428 }
14091429 System .out .println (System .currentTimeMillis () - time );
14101430
14111431 CssSelector cssSelector = new CssSelector ("a" );
1412- time =System .currentTimeMillis ();
1432+ time = System .currentTimeMillis ();
14131433 for (int i = 0 ; i < 1000 ; i ++) {
14141434 cssSelector .selectList (html );
14151435 }
1416- System .out .println ("css " + (System .currentTimeMillis ()- time ));
1436+ System .out .println ("css " + (System .currentTimeMillis () - time ));
14171437 }
14181438
14191439 @ Ignore ("take long time" )
@@ -1425,54 +1445,54 @@ public void parserPerformanceTest() throws XPatherException {
14251445 TagNode tagNode = htmlCleaner .clean (html );
14261446 Document document = Jsoup .parse (html );
14271447
1428- long time =System .currentTimeMillis ();
1448+ long time = System .currentTimeMillis ();
14291449 for (int i = 0 ; i < 2000 ; i ++) {
14301450 htmlCleaner .clean (html );
14311451 }
1432- System .out .println (System .currentTimeMillis ()- time );
1452+ System .out .println (System .currentTimeMillis () - time );
14331453
1434- time =System .currentTimeMillis ();
1454+ time = System .currentTimeMillis ();
14351455 for (int i = 0 ; i < 2000 ; i ++) {
14361456 tagNode .evaluateXPath ("//a" );
14371457 }
1438- System .out .println (System .currentTimeMillis ()- time );
1458+ System .out .println (System .currentTimeMillis () - time );
14391459
14401460 System .out .println ("=============" );
14411461
1442- time =System .currentTimeMillis ();
1462+ time = System .currentTimeMillis ();
14431463 for (int i = 0 ; i < 2000 ; i ++) {
14441464 Jsoup .parse (html );
14451465 }
1446- System .out .println (System .currentTimeMillis ()- time );
1466+ System .out .println (System .currentTimeMillis () - time );
14471467
1448- time =System .currentTimeMillis ();
1468+ time = System .currentTimeMillis ();
14491469 for (int i = 0 ; i < 2000 ; i ++) {
14501470 document .select ("a" );
14511471 }
1452- System .out .println (System .currentTimeMillis ()- time );
1472+ System .out .println (System .currentTimeMillis () - time );
14531473
14541474 System .out .println ("=============" );
14551475
1456- time =System .currentTimeMillis ();
1476+ time = System .currentTimeMillis ();
14571477 for (int i = 0 ; i < 2000 ; i ++) {
14581478 htmlCleaner .clean (html );
14591479 }
1460- System .out .println (System .currentTimeMillis ()- time );
1480+ System .out .println (System .currentTimeMillis () - time );
14611481
1462- time =System .currentTimeMillis ();
1482+ time = System .currentTimeMillis ();
14631483 for (int i = 0 ; i < 2000 ; i ++) {
14641484 tagNode .evaluateXPath ("//a" );
14651485 }
1466- System .out .println (System .currentTimeMillis ()- time );
1486+ System .out .println (System .currentTimeMillis () - time );
14671487
14681488 System .out .println ("=============" );
14691489
14701490 XPathEvaluator compile = Xsoup .compile ("//a" );
1471- time =System .currentTimeMillis ();
1491+ time = System .currentTimeMillis ();
14721492 for (int i = 0 ; i < 2000 ; i ++) {
14731493 compile .evaluate (document );
14741494 }
1475- System .out .println (System .currentTimeMillis ()- time );
1495+ System .out .println (System .currentTimeMillis () - time );
14761496
14771497 }
14781498
0 commit comments