Skip to content

Commit 1770a11

Browse files
committed
fixed: code
1 parent ccd6e16 commit 1770a11

File tree

1 file changed

+11
-36
lines changed

1 file changed

+11
-36
lines changed

practice/09_crawling_reCAPTCHA_image.ipynb

Lines changed: 11 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
},
1414
{
1515
"cell_type": "code",
16-
"execution_count": 16,
16+
"execution_count": 1,
1717
"metadata": {
1818
"collapsed": true
1919
},
@@ -22,12 +22,11 @@
2222
"import os\n",
2323
"import hashlib\n",
2424
"import requests\n",
25+
"import time\n",
2526
"\n",
2627
"from selenium import webdriver\n",
2728
"from selenium.webdriver.common.keys import Keys\n",
2829
"from selenium.webdriver.common.by import By\n",
29-
"from selenium.webdriver.support.ui import WebDriverWait\n",
30-
"from selenium.webdriver.support import expected_conditions as EC\n",
3130
"\n",
3231
"from fake_useragent import UserAgent\n",
3332
"from PIL import Image\n",
@@ -38,41 +37,18 @@
3837
},
3938
{
4039
"cell_type": "code",
41-
"execution_count": 28,
42-
"metadata": {},
43-
"outputs": [],
44-
"source": [
45-
"class ReloadImage(object):\n",
46-
" def __init__(self, locator, xpath, compare):\n",
47-
" self.locator = locator\n",
48-
" self.xpath = xpath\n",
49-
" self.compare = compare\n",
50-
" def __call__(self, driver):\n",
51-
" element = driver.find_element(self.locator, self.xpath)\n",
52-
" attr = element.get_attribute('src')\n",
53-
" if attr != self.compare:\n",
54-
" return element\n",
55-
" else:\n",
56-
" return False"
57-
]
58-
},
59-
{
60-
"cell_type": "code",
61-
"execution_count": 39,
40+
"execution_count": 2,
6241
"metadata": {},
6342
"outputs": [
6443
{
6544
"name": "stdout",
6645
"output_type": "stream",
6746
"text": [
68-
"Save img - /home/dirl/github/Python-Crawling-Tutorial/results/dcc521eea7e48f005685d58e770fc9be.JPEG\n",
69-
"click\n",
70-
"Save img - /home/dirl/github/Python-Crawling-Tutorial/results/6cf5653979671789ef664c9122f83f6f.JPEG\n",
71-
"click\n",
72-
"Message: chrome not reachable\n",
73-
" (Session info: chrome=64.0.3282.119)\n",
74-
" (Driver info: chromedriver=2.35.528139 (47ead77cb35ad2a9a83248b292151462a66cd881),platform=Linux 4.13.0-26-generic x86_64)\n",
75-
"\n"
47+
"Save img - /home/dirl/github/Python-Crawling-Tutorial/results/ceecac6a5a9677750a69c80a87f26080.JPEG\n",
48+
"Save img - /home/dirl/github/Python-Crawling-Tutorial/results/ceecac6a5a9677750a69c80a87f26080.JPEG\n",
49+
"Save img - /home/dirl/github/Python-Crawling-Tutorial/results/4cce70c2cdde67af52e27920693da213.JPEG\n",
50+
"Save img - /home/dirl/github/Python-Crawling-Tutorial/results/1682c3490f1ec9df1da4a43407f890b7.JPEG\n",
51+
"Save img - /home/dirl/github/Python-Crawling-Tutorial/results/aa6a4d1bfa181fc53636a341562fb2ea.PNG\n"
7652
]
7753
}
7854
],
@@ -85,13 +61,12 @@
8561
" driver = webdriver.Chrome()\n",
8662
" driver.get(url)\n",
8763
" driver.maximize_window()\n",
88-
" wait = WebDriverWait(driver, 10)\n",
64+
" driver.implicitly_wait(10)\n",
8965
" compare_url = ''\n",
9066
" \n",
9167
" for i in range(5):\n",
9268
" # get image\n",
93-
" #img_el = driver.find_element(By.XPATH, '//div[@id=\"recaptcha_image\"]/img')\n",
94-
" img_el = wait.until(ReloadImage(By.XPATH, '//div[@id=\"recaptcha_image\"]/img', compare_url))\n",
69+
" img_el = driver.find_element(By.XPATH, '//div[@id=\"recaptcha_image\"]/img')\n",
9570
" img_url = img_el.get_attribute('src')\n",
9671
" img_filename = hashlib.md5(img_url.encode('utf-8')).hexdigest()\n",
9772
" compare_url = img_url\n",
@@ -106,7 +81,7 @@
10681
" \n",
10782
" # re-generate image\n",
10883
" btn_refresh = driver.find_element(By.XPATH, '//*[@id=\"recaptcha_reload_btn\"]').click()\n",
109-
" print('click')\n",
84+
" time.sleep(2)\n",
11085
" \n",
11186
"\n",
11287
"except Exception as e:\n",

0 commit comments

Comments
 (0)