File tree Expand file tree Collapse file tree 3 files changed +15
-1
lines changed
webmagic-core/src/main/java/us/codecraft/webmagic/selector Expand file tree Collapse file tree 3 files changed +15
-1
lines changed Original file line number Diff line number Diff line change @@ -31,6 +31,11 @@ public Selectable smartContent() {
3131 return select (smartContentSelector , getSourceTexts ());
3232 }
3333
34+ public Selectable smartContent (int threshold ) {
35+ SmartContentSelector smartContentSelector = Selectors .smartContent (threshold );
36+ return select (smartContentSelector , getSourceTexts ());
37+ }
38+
3439 @ Override
3540 public Selectable links () {
3641 return selectElements (new LinksSelector ());
Original file line number Diff line number Diff line change @@ -20,6 +20,10 @@ public static SmartContentSelector smartContent() {
2020 return new SmartContentSelector ();
2121 }
2222
23+ public static SmartContentSelector smartContent (int threshold ) {
24+ return new SmartContentSelector (threshold );
25+ }
26+
2327 public static CssSelector $ (String expr ) {
2428 return new CssSelector (expr );
2529 }
Original file line number Diff line number Diff line change 1616@ Experimental
1717public class SmartContentSelector implements Selector {
1818
19+ private int threshold = 86 ;
20+
1921 public SmartContentSelector () {
2022 }
2123
24+ public SmartContentSelector (int threshold ) {
25+ this .threshold = threshold ;
26+ }
27+
2228 @ Override
2329 public String select (String html ) {
2430 html = html .replaceAll ("(?is)<!DOCTYPE.*?>" , "" );
@@ -29,7 +35,6 @@ public String select(String html) {
2935 html = html .replaceAll ("(?is)<.*?>" , "" );
3036 List <String > lines ;
3137 int blocksWidth =3 ;
32- int threshold =86 ;
3338 int start ;
3439 int end ;
3540 StringBuilder text = new StringBuilder ();
You can’t perform that action at this time.
0 commit comments