Skip to content

Commit 05a1f39

Browse files
committed
Merge pull request #193 from EdwardsBean/fix-mppipeline
Bug fix:MultiPagePipeline and DoubleKeyMap concurrent bug
2 parents 6b9d21f + 74962d6 commit 05a1f39

File tree

2 files changed

+47
-36
lines changed

2 files changed

+47
-36
lines changed

webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/MultiPagePipeline.java

Lines changed: 44 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -36,51 +36,61 @@ public void process(ResultItems resultItems, Task task) {
3636
private void handleObject(Iterator<Map.Entry<String, Object>> iterator) {
3737
Map.Entry<String, Object> objectEntry = iterator.next();
3838
Object o = objectEntry.getValue();
39+
//需要拼凑
3940
if (o instanceof MultiPageModel) {
4041
MultiPageModel multiPageModel = (MultiPageModel) o;
41-
pageMap.put(multiPageModel.getPageKey(), multiPageModel.getPage(), Boolean.TRUE);
42-
if (multiPageModel.getOtherPages() != null) {
43-
for (String otherPage : multiPageModel.getOtherPages()) {
44-
Boolean aBoolean = pageMap.get(multiPageModel.getPageKey(), otherPage);
45-
if (aBoolean == null) {
46-
pageMap.put(multiPageModel.getPageKey(), otherPage, Boolean.FALSE);
42+
//这次处理的部分,设置为完成
43+
pageMap.put(multiPageModel.getPageKey(), multiPageModel.getPage(), Boolean.FALSE);
44+
//每个key单独加锁
45+
synchronized (pageMap.get(multiPageModel.getPageKey())) {
46+
pageMap.put(multiPageModel.getPageKey(), multiPageModel.getPage(), Boolean.TRUE);
47+
//其他需要拼凑的部分
48+
if (multiPageModel.getOtherPages() != null) {
49+
for (String otherPage : multiPageModel.getOtherPages()) {
50+
Boolean aBoolean = pageMap.get(multiPageModel.getPageKey(), otherPage);
51+
if (aBoolean == null) {
52+
pageMap.put(multiPageModel.getPageKey(), otherPage, Boolean.FALSE);
53+
}
4754
}
4855
}
49-
}
50-
//check if all pages are processed
51-
Map<String, Boolean> booleanMap = pageMap.get(multiPageModel.getPageKey());
52-
objectMap.put(multiPageModel.getPageKey(), multiPageModel.getPage(), multiPageModel);
53-
if (booleanMap == null) {
54-
return;
55-
}
56-
for (Map.Entry<String, Boolean> stringBooleanEntry : booleanMap.entrySet()) {
57-
if (!stringBooleanEntry.getValue()) {
58-
iterator.remove();
56+
//check if all pages are processed
57+
Map<String, Boolean> booleanMap = pageMap.get(multiPageModel.getPageKey());
58+
objectMap.put(multiPageModel.getPageKey(), multiPageModel.getPage(), multiPageModel);
59+
if (booleanMap == null) {
5960
return;
6061
}
61-
}
62-
List<Map.Entry<String, MultiPageModel>> entryList = new ArrayList<Map.Entry<String, MultiPageModel>>();
63-
entryList.addAll(objectMap.get(multiPageModel.getPageKey()).entrySet());
64-
if (entryList.size() != 0) {
65-
Collections.sort(entryList, new Comparator<Map.Entry<String, MultiPageModel>>() {
66-
@Override
67-
public int compare(Map.Entry<String, MultiPageModel> o1, Map.Entry<String, MultiPageModel> o2) {
68-
try {
69-
int i1 = Integer.parseInt(o1.getKey());
70-
int i2 = Integer.parseInt(o2.getKey());
71-
return i1 - i2;
72-
} catch (NumberFormatException e) {
73-
return o1.getKey().compareTo(o2.getKey());
62+
// /过滤,这次完成的page item中,还未拼凑完整的item,不进入下一个pipeline
63+
for (Map.Entry<String, Boolean> stringBooleanEntry : booleanMap.entrySet()) {
64+
if (!stringBooleanEntry.getValue()) {
65+
iterator.remove();
66+
return;
67+
}
68+
}
69+
List<Map.Entry<String, MultiPageModel>> entryList = new ArrayList<Map.Entry<String, MultiPageModel>>();
70+
entryList.addAll(objectMap.get(multiPageModel.getPageKey()).entrySet());
71+
if (entryList.size() != 0) {
72+
Collections.sort(entryList, new Comparator<Map.Entry<String, MultiPageModel>>() {
73+
@Override
74+
public int compare(Map.Entry<String, MultiPageModel> o1, Map.Entry<String, MultiPageModel> o2) {
75+
try {
76+
int i1 = Integer.parseInt(o1.getKey());
77+
int i2 = Integer.parseInt(o2.getKey());
78+
return i1 - i2;
79+
} catch (NumberFormatException e) {
80+
return o1.getKey().compareTo(o2.getKey());
81+
}
7482
}
83+
});
84+
// 合并
85+
MultiPageModel value = entryList.get(0).getValue();
86+
for (int i = 1; i < entryList.size(); i++) {
87+
value = value.combine(entryList.get(i).getValue());
7588
}
76-
});
77-
MultiPageModel value = entryList.get(0).getValue();
78-
for (int i = 1; i < entryList.size(); i++) {
79-
value = value.combine(entryList.get(i).getValue());
89+
objectEntry.setValue(value);
8090
}
81-
objectEntry.setValue(value);
8291
}
8392
}
93+
8494
}
8595

8696
}

webmagic-extension/src/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,9 @@ public V put(K1 key1, Map<K2, V> submap) {
7575
* @param value
7676
* @return value
7777
*/
78-
public V put(K1 key1, K2 key2, V value) {
78+
public synchronized V put(K1 key1, K2 key2, V value) {
7979
if (map.get(key1) == null) {
80+
//不加锁的话,多个线程有可能都会执行到这里
8081
map.put(key1, this.<K2, V>newMap());
8182
}
8283
return get(key1).put(key2, value);
@@ -87,7 +88,7 @@ public V put(K1 key1, K2 key2, V value) {
8788
* @param key2
8889
* @return value
8990
*/
90-
public V remove(K1 key1, K2 key2) {
91+
public synchronized V remove(K1 key1, K2 key2) {
9192
if (get(key1) == null) {
9293
return null;
9394
}

0 commit comments

Comments
 (0)