@@ -102,7 +102,7 @@ public void test02() throws Exception {
102102 }
103103
104104 @ Test
105- public void test03 () throws Exception {
105+ public void test03 () throws Exception {
106106 String fileName = "D:\\ Program Files (x86)\\ Reader_v1.9.3.2\\ 天命大反派.txt" ;
107107 if (!FileUtil .exist (fileName )) {
108108 File file = new File (fileName );
@@ -134,18 +134,19 @@ public void test03() throws Exception {
134134 WebDriver chromeDriver = new ChromeDriver (chromeOptions );
135135
136136 TestSelenium .text (chromeDriver , "https://www.xs123.org/xs/33/33112/21404025.html" , fileName );
137+ // TestSelenium.text(chromeDriver, "https://www.xs123.org/xs/33/33112/70349769.html", fileName);
137138 // TestSelenium.text(chromeDriver, "https://www.xs123.org/xs/33/33112/70963309.html", fileName);
138139
139140 chromeDriver .close ();
140141 }
141142
142- public static void text (WebDriver chromeDriver , String url , String fileName ) {
143+ public static void text (WebDriver chromeDriver , String url , String fileName ) throws Exception {
143144 chromeDriver .get (url );
145+ Thread .sleep (100 );
144146 WebElement boxWebElement = chromeDriver .findElement (By .className ("box_con" ));
145147 WebElement titleElement = boxWebElement .findElement (By .tagName ("h1" ));
146148
147- // System.out.println(titleElement.getText());
148- if (titleElement .getText ().contains ("950" )) {
149+ if (titleElement .getText ().contains ("1030" )) {
149150 return ;
150151 }
151152 if ("玄幻:我!天命大反派" .equals (titleElement .getText ())) {
@@ -154,17 +155,43 @@ public static void text(WebDriver chromeDriver, String url, String fileName) {
154155
155156 // 标题
156157 List <String > lines = new ArrayList <>();
157- lines .add ("第" + titleElement .getText ().substring (0 ,4 ) + "章 " + titleElement .getText ().substring (4 ));
158- lines .add ("" );
158+ String title = "第" + titleElement .getText ().substring (0 ,4 ).trim () + "章 " + titleElement .getText ().substring (4 ).trim ();
159+ title .replaceAll ("/?" , "" );
160+ title .replaceAll (":" , "" );
161+ System .out .println (title );
162+ lines .add (title );
159163
160164 // 正文
161165 WebElement conWebElement = chromeDriver .findElement (By .id ("content" ));
162166 String con = conWebElement .getText ();
163- // con = con.replaceAll("<br/>", "/r/n");
164- lines .add (con );
167+ con = con .replaceAll ("<br/>" , "" );
168+ con = con .replaceAll ("\n " , "" );
169+ con = con .replaceAll ("," , "," );
170+ String [] conArray = con .split ("。" );
171+ for (String text : conArray ) {
172+ if (text .length () > 50 ) {
173+ String [] conArray2 = text .split ("," );
174+ for (int i = 0 ; i < conArray2 .length ; i ++) {
175+ if (i + 2 < conArray2 .length ) {
176+ lines .add (conArray2 [i ] + "," + conArray2 [++i ] + "," + conArray2 [++i ] + "," );
177+ } else if (i + 1 < conArray2 .length ) {
178+ lines .add (conArray2 [i ] + "," + conArray2 [++i ] + "," );
179+ } else {
180+ lines .add (conArray2 [i ] + "。" );
181+ }
182+ lines .add ("" );
183+ }
184+ lines .add ("" );
185+ } else {
186+ lines .add (text + "。" );
187+ lines .add ("" );
188+ }
189+ }
165190
166191 FileUtil .appendUtf8Lines (lines , fileName );
167192
193+ Thread .sleep (10 );
194+
168195 WebElement btnWebElement = chromeDriver .findElement (By .className ("bottem2" ));
169196 List <WebElement > btnListWebElement = btnWebElement .findElements (By .tagName ("a" ));
170197
0 commit comments