Skip to content

Commit 3f756c9

Browse files
committed
Revert " 代理功能扩展,对原代理提供商进行拆分,加入lombok"
This reverts commit 33906e3.
1 parent aabc558 commit 3f756c9

File tree

10 files changed

+82
-272
lines changed

10 files changed

+82
-272
lines changed

webmagic-core/pom.xml

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
<?xml version="1.0" encoding="UTF-8"?>
2-
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3-
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
43
<parent>
54
<groupId>us.codecraft</groupId>
65
<artifactId>webmagic-parent</artifactId>
@@ -25,12 +24,6 @@
2524
<groupId>org.apache.commons</groupId>
2625
<artifactId>commons-lang3</artifactId>
2726
</dependency>
28-
<dependency>
29-
<groupId>org.projectlombok</groupId>
30-
<artifactId>lombok</artifactId>
31-
<version>1.18.10</version>
32-
<scope>provided</scope>
33-
</dependency>
3427

3528
<dependency>
3629
<groupId>us.codecraft</groupId>

webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,6 @@ private void onDownloadSuccess(Request request, Page page) {
426426
}
427427
} else if(site.getRefreshCode().contains(page.getStatusCode())) {
428428
logger.info("page status code error, page {} , code: {}, start refresh downloader", request.getUrl(), page.getStatusCode());
429-
downloader.refreshComponent(this);
430429
failHandler(request);
431430
}else {
432431
logger.info("page status code error, page {} , code: {}", request.getUrl(), page.getStatusCode());
@@ -440,6 +439,7 @@ private void onDownloaderFail(Request request) {
440439
}
441440

442441
private void failHandler(Request request){
442+
downloader.refreshComponent(this);
443443
if (site.getCycleRetryTimes() == 0) {
444444
sleep(site.getSleepTime());
445445
} else {

webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313
import us.codecraft.webmagic.Task;
1414
import us.codecraft.webmagic.proxy.Proxy;
1515
import us.codecraft.webmagic.proxy.ProxyProvider;
16-
import us.codecraft.webmagic.proxy.RefreshableProxyProvider;
17-
import us.codecraft.webmagic.proxy.ReturnableProxyProvider;
1816
import us.codecraft.webmagic.selector.PlainText;
1917
import us.codecraft.webmagic.utils.CharsetUtils;
2018
import us.codecraft.webmagic.utils.HttpClientUtils;
@@ -95,8 +93,8 @@ public Page download(Request request, Task task) {
9593
} catch (IOException e) {
9694
logger.warn("download page {} error", request.getUrl(), e);
9795
onError(request, e, proxyProvider);
98-
if (proxyProvider != null && proxy != null && proxyProvider instanceof RefreshableProxyProvider && refreshProxyOnError.test(e)) {
99-
((RefreshableProxyProvider)proxyProvider).refreshProxy(task,proxy);
96+
if (proxyProvider != null && refreshProxyOnError.test(e)) {
97+
proxyProvider.refreshProxy(task,proxy);
10098
}
10199
if(refreshClientOnError.test(e)) {
102100
httpClients.remove(task.getSite().getDomain());
@@ -107,18 +105,17 @@ public Page download(Request request, Task task) {
107105
//ensure the connection is released back to pool
108106
EntityUtils.consumeQuietly(httpResponse.getEntity());
109107
}
110-
if (proxyProvider != null && proxy != null && proxyProvider instanceof ReturnableProxyProvider) {
111-
((ReturnableProxyProvider) proxyProvider).returnProxy(proxy, page, task);
112-
108+
if (proxyProvider != null && proxy != null) {
109+
proxyProvider.returnProxy(proxy, page, task);
113110
}
114111
}
115112
}
116113

117114

118115
@Override
119116
public void refreshComponent(Task task) {
120-
if (proxyProvider != null && proxyProvider instanceof RefreshableProxyProvider) {
121-
((RefreshableProxyProvider) proxyProvider).refreshProxy(task, ((RefreshableProxyProvider) proxyProvider).getCurrentProxy(task));
117+
if (proxyProvider != null ) {
118+
proxyProvider.refreshProxy(task,proxyProvider.getCurrentProxy(task));
122119
}
123120

124121
httpClients.remove(task.getSite().getDomain());

webmagic-core/src/main/java/us/codecraft/webmagic/proxy/AbstractRefreshableProxyProvider.java

Lines changed: 0 additions & 135 deletions
This file was deleted.

webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ExpirableProxy.java

Lines changed: 0 additions & 34 deletions
This file was deleted.

webmagic-core/src/main/java/us/codecraft/webmagic/proxy/Proxy.java

Lines changed: 32 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -7,68 +7,67 @@
77
import java.nio.charset.StandardCharsets;
88

99
import org.apache.commons.lang3.StringUtils;
10-
import org.apache.http.annotation.Contract;
11-
import org.apache.http.annotation.ThreadingBehavior;
1210

13-
@Contract(threading = ThreadingBehavior.IMMUTABLE)
1411
public class Proxy {
1512

16-
private final String scheme;
13+
private String scheme;
1714

18-
private final String host;
15+
private String host;
1916

20-
private final int port;
17+
private int port;
2118

22-
private final String username;
19+
private String username;
2320

24-
private final String password;
21+
private String password;
2522

26-
public Proxy(String host, int port, String scheme, String username, String password) {
27-
this.scheme = scheme;
28-
this.host = host;
29-
this.port = port;
30-
this.username = username;
31-
this.password = password;
23+
public static Proxy create(final URI uri) {
24+
Proxy proxy = new Proxy(uri.getHost(), uri.getPort(), uri.getScheme());
25+
String userInfo = uri.getUserInfo();
26+
if (userInfo != null) {
27+
String[] up = userInfo.split(":");
28+
if (up.length == 1) {
29+
proxy.username = up[0].isEmpty() ? null : up[0];
30+
} else {
31+
proxy.username = up[0].isEmpty() ? null : up[0];
32+
proxy.password = up[1].isEmpty() ? null : up[1];
33+
}
34+
}
35+
return proxy;
3236
}
3337

3438
public Proxy(String host, int port) {
3539
this(host, port, null);
3640
}
3741

3842
public Proxy(String host, int port, String scheme) {
39-
this(host, port, scheme, null, null);
43+
this.host = host;
44+
this.port = port;
45+
this.scheme = scheme;
4046
}
4147

4248
public Proxy(String host, int port, String username, String password) {
43-
this(host, port, null, username, password);
49+
this.host = host;
50+
this.port = port;
51+
this.username = username;
52+
this.password = password;
4453
}
4554

46-
public static Proxy create(final URI uri) {
47-
String userInfo = uri.getUserInfo();
48-
String username = null;
49-
String password = null;
50-
if (userInfo != null) {
51-
String[] up = userInfo.split(":");
52-
if (up.length == 1) {
53-
username = up[0].isEmpty() ? null : up[0];
54-
} else {
55-
username = up[0].isEmpty() ? null : up[0];
56-
password = up[1].isEmpty() ? null : up[1];
57-
}
58-
}
59-
return new Proxy(uri.getHost(), uri.getPort(), uri.getScheme(), username, password);
55+
public String getScheme() {
56+
return scheme;
6057
}
6158

62-
public String getHost() {
59+
public void setScheme(String scheme) {
60+
this.scheme = scheme;
61+
}
62+
63+
public String getHost() {
6364
return host;
6465
}
6566

6667
public int getPort() {
6768
return port;
6869
}
6970

70-
public String getScheme(){return scheme;}
71-
7271
public String getUsername() {
7372
return username;
7473
}

webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ProxyProvider.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package us.codecraft.webmagic.proxy;
22

3+
import us.codecraft.webmagic.Page;
34
import us.codecraft.webmagic.Task;
45

56
/**
@@ -9,6 +10,32 @@
910
*/
1011
public interface ProxyProvider {
1112

13+
/**
14+
*
15+
* Return proxy to Provider when complete a download.
16+
* @param proxy the proxy config contains host,port and identify info
17+
* @param page the download result
18+
* @param task the download task
19+
*/
20+
void returnProxy(Proxy proxy, Page page, Task task);
21+
22+
/**
23+
* 代理IP是珍贵资源,有可能代理提供者内部代理没有过期,就一直提供某个IP,但这个IP又不可以使用,所以提供一种方式通知提供者,这个代理该刷新了
24+
*
25+
* @param task 下载任务
26+
* @param proxy 需要对代理进行验证,如果确实持有的时错误代理,则刷新,否则,继续执行
27+
*/
28+
void refreshProxy(Task task,Proxy proxy);
29+
30+
31+
/**
32+
*
33+
* 获取当前正在提供的代理
34+
*
35+
* @param task
36+
* @return
37+
*/
38+
Proxy getCurrentProxy(Task task);
1239

1340
/**
1441
* Get a proxy for task by some strategy.

0 commit comments

Comments
 (0)