Skip to content

Commit c489647

Browse files
committed
Revert " Downloader 提供刷新组件的api,方便在spider中操作"
This reverts commit 2e2a0fd.
1 parent 4bedd97 commit c489647

File tree

11 files changed

+10
-73
lines changed

11 files changed

+10
-73
lines changed

webmagic-core/src/main/java/us/codecraft/webmagic/Site.java

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -40,20 +40,15 @@ public class Site {
4040

4141
private static final Set<Integer> DEFAULT_STATUS_CODE_SET = new HashSet<Integer>();
4242

43-
private static final Set<Integer> DEFAULT_REFRESH_CODE_SET = new HashSet<>();
44-
45-
private Set<Integer> refreshCode = DEFAULT_REFRESH_CODE_SET;
4643
private Set<Integer> acceptStatCode = DEFAULT_STATUS_CODE_SET;
4744

48-
4945
private Map<String, String> headers = new HashMap<String, String>();
5046

5147
private boolean useGzip = true;
5248

5349
private boolean disableCookieManagement = false;
5450

5551
static {
56-
DEFAULT_REFRESH_CODE_SET.add(HttpConstant.StatusCode.FORBIDDEN);
5752
DEFAULT_STATUS_CODE_SET.add(HttpConstant.StatusCode.CODE_200);
5853
}
5954

@@ -202,15 +197,6 @@ public Site setAcceptStatCode(Set<Integer> acceptStatCode) {
202197
return this;
203198
}
204199

205-
public Site setRefreshCode(Set<Integer> refreshCode){
206-
this.refreshCode = refreshCode;
207-
return this;
208-
}
209-
public Set<Integer> getRefreshCode(){
210-
return refreshCode;
211-
212-
}
213-
214200
/**
215201
* get acceptStatCode
216202
*

webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -424,10 +424,7 @@ private void onDownloadSuccess(Request request, Page page) {
424424
pipeline.process(page.getResultItems(), this);
425425
}
426426
}
427-
} else if(site.getRefreshCode().contains(page.getStatusCode())) {
428-
logger.info("page status code error, page {} , code: {}, start refresh downloader", request.getUrl(), page.getStatusCode());
429-
downloader.refreshComponent(this);
430-
}else {
427+
} else {
431428
logger.info("page status code error, page {} , code: {}", request.getUrl(), page.getStatusCode());
432429
}
433430
sleep(site.getSleepTime());

webmagic-core/src/main/java/us/codecraft/webmagic/downloader/Downloader.java

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,14 @@ public interface Downloader {
1818
* Downloads web pages and store in Page object.
1919
*
2020
* @param request request
21-
* @param task task
21+
* @param task task
2222
* @return page
2323
*/
24-
Page download(Request request, Task task);
24+
public Page download(Request request, Task task);
2525

2626
/**
2727
* Tell the downloader how many threads the spider used.
28-
*
2928
* @param threadNum number of threads
3029
*/
31-
void setThread(int threadNum);
32-
33-
34-
void refreshComponent(Task task);
30+
public void setThread(int threadNum);
3531
}

webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -111,17 +111,6 @@ public Page download(Request request, Task task) {
111111
}
112112
}
113113

114-
115-
@Override
116-
public void refreshComponent(Task task) {
117-
if (proxyProvider != null ) {
118-
proxyProvider.refreshProxy(task);
119-
}
120-
121-
httpClients.remove(task.getSite().getDomain());
122-
123-
}
124-
125114
@Override
126115
public void setThread(int thread) {
127116
httpClientGenerator.setPoolSize(thread);

webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,13 @@
11
package us.codecraft.webmagic.downloader;
22

3-
import java.io.File;
43
import java.io.IOException;
54
import java.security.KeyManagementException;
6-
import java.security.KeyStore;
7-
import java.security.KeyStoreException;
85
import java.security.NoSuchAlgorithmException;
96
import java.security.cert.CertificateException;
107
import java.security.cert.X509Certificate;
118
import java.util.Map;
129

1310
import javax.net.ssl.SSLContext;
14-
import javax.net.ssl.SSLContextSpi;
1511
import javax.net.ssl.TrustManager;
1612
import javax.net.ssl.X509TrustManager;
1713

@@ -28,7 +24,6 @@
2824
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
2925
import org.apache.http.conn.ssl.DefaultHostnameVerifier;
3026
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
31-
import org.apache.http.conn.ssl.TrustSelfSignedStrategy;
3227
import org.apache.http.impl.client.BasicCookieStore;
3328
import org.apache.http.impl.client.CloseableHttpClient;
3429
import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
@@ -37,7 +32,6 @@
3732
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
3833
import org.apache.http.impl.cookie.BasicClientCookie;
3934
import org.apache.http.protocol.HttpContext;
40-
import org.apache.http.ssl.SSLContexts;
4135
import org.slf4j.Logger;
4236
import org.slf4j.LoggerFactory;
4337

@@ -75,16 +69,16 @@ private SSLConnectionSocketFactory buildSSLConnectionSocketFactory() {
7569
return new SSLConnectionSocketFactory(sslContext, supportedProtocols,
7670
null,
7771
new DefaultHostnameVerifier()); // 优先绕过安全证书
78-
} catch (KeyManagementException | CertificateException | KeyStoreException | IOException e) {
72+
} catch (KeyManagementException e) {
7973
logger.error("ssl connection fail", e);
8074
} catch (NoSuchAlgorithmException e) {
8175
logger.error("ssl connection fail", e);
8276
}
8377
return SSLConnectionSocketFactory.getSocketFactory();
8478
}
8579

86-
private SSLContext createIgnoreVerifySSL() throws NoSuchAlgorithmException, KeyManagementException, CertificateException, KeyStoreException, IOException {
87-
// 实现一个X509TrustManager接口,用于绕过验证,不用修改里面的方法
80+
private SSLContext createIgnoreVerifySSL() throws NoSuchAlgorithmException, KeyManagementException {
81+
// 实现一个X509TrustManager接口,用于绕过验证,不用修改里面的方法
8882
X509TrustManager trustManager = new X509TrustManager() {
8983

9084
@Override
@@ -102,10 +96,10 @@ public X509Certificate[] getAcceptedIssuers() {
10296

10397
};
10498

105-
SSLContext sc = SSLContext.getInstance("SSLv3");
99+
SSLContext sc = SSLContext.getInstance("TLS");
106100
sc.init(null, new TrustManager[] { trustManager }, null);
107101
return sc;
108-
}
102+
}
109103

110104
public HttpClientGenerator setPoolSize(int poolSize) {
111105
connectionManager.setMaxTotal(poolSize);

webmagic-core/src/main/java/us/codecraft/webmagic/utils/HttpConstant.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ public static abstract class Method {
2828
public static abstract class StatusCode {
2929

3030
public static final int CODE_200 = 200;
31-
public static final int FORBIDDEN = 403;
3231

3332
}
3433

webmagic-core/src/test/java/us/codecraft/webmagic/SpiderTest.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,6 @@ public Site getSite() {
5757
return Site.me().setSleepTime(0);
5858
}
5959
}).setDownloader(new Downloader() {
60-
@Override
61-
public void refreshComponent(Task task) {
62-
63-
}
64-
6560
@Override
6661
public Page download(Request request, Task task) {
6762
return new Page().setRawText("");

webmagic-core/src/test/java/us/codecraft/webmagic/downloader/MockGithubDownloader.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,6 @@ public Page download(Request request, Task task) {
2828
return page;
2929
}
3030

31-
@Override
32-
public void refreshComponent(Task task) {
33-
34-
}
35-
3631
@Override
3732
public void setThread(int threadNum) {
3833
}

webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,7 @@ public PhantomJSDownloader(String phantomJsCommand) {
4242
this.initPhantomjsCrawlPath();
4343
PhantomJSDownloader.phantomJsCommand = phantomJsCommand;
4444
}
45-
46-
@Override
47-
public void refreshComponent(Task task) {
48-
49-
}
50-
45+
5146
/**
5247
* 新增构造函数,支持crawl.js路径自定义,因为当其他项目依赖此jar包时,runtime.exec()执行phantomjs命令时无使用法jar包中的crawl.js
5348
* <pre>

webmagic-extension/src/test/java/us/codecraft/webmagic/downloader/MockGithubDownloader.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,6 @@
99
* @author code4crafter@gmail.com
1010
*/
1111
public class MockGithubDownloader implements Downloader{
12-
@Override
13-
public void refreshComponent(Task task) {
14-
15-
}
1612

1713
private String html = "\n" +
1814
"\n" +

0 commit comments

Comments
 (0)