|
13 | 13 | }, |
14 | 14 | { |
15 | 15 | "cell_type": "code", |
16 | | - "execution_count": 16, |
| 16 | + "execution_count": 1, |
17 | 17 | "metadata": { |
18 | 18 | "collapsed": true |
19 | 19 | }, |
|
22 | 22 | "import os\n", |
23 | 23 | "import hashlib\n", |
24 | 24 | "import requests\n", |
| 25 | + "import time\n", |
25 | 26 | "\n", |
26 | 27 | "from selenium import webdriver\n", |
27 | 28 | "from selenium.webdriver.common.keys import Keys\n", |
28 | 29 | "from selenium.webdriver.common.by import By\n", |
29 | | - "from selenium.webdriver.support.ui import WebDriverWait\n", |
30 | | - "from selenium.webdriver.support import expected_conditions as EC\n", |
31 | 30 | "\n", |
32 | 31 | "from fake_useragent import UserAgent\n", |
33 | 32 | "from PIL import Image\n", |
|
38 | 37 | }, |
39 | 38 | { |
40 | 39 | "cell_type": "code", |
41 | | - "execution_count": 28, |
42 | | - "metadata": {}, |
43 | | - "outputs": [], |
44 | | - "source": [ |
45 | | - "class ReloadImage(object):\n", |
46 | | - " def __init__(self, locator, xpath, compare):\n", |
47 | | - " self.locator = locator\n", |
48 | | - " self.xpath = xpath\n", |
49 | | - " self.compare = compare\n", |
50 | | - " def __call__(self, driver):\n", |
51 | | - " element = driver.find_element(self.locator, self.xpath)\n", |
52 | | - " attr = element.get_attribute('src')\n", |
53 | | - " if attr != self.compare:\n", |
54 | | - " return element\n", |
55 | | - " else:\n", |
56 | | - " return False" |
57 | | - ] |
58 | | - }, |
59 | | - { |
60 | | - "cell_type": "code", |
61 | | - "execution_count": 39, |
| 40 | + "execution_count": 2, |
62 | 41 | "metadata": {}, |
63 | 42 | "outputs": [ |
64 | 43 | { |
65 | 44 | "name": "stdout", |
66 | 45 | "output_type": "stream", |
67 | 46 | "text": [ |
68 | | - "Save img - /home/dirl/github/Python-Crawling-Tutorial/results/dcc521eea7e48f005685d58e770fc9be.JPEG\n", |
69 | | - "click\n", |
70 | | - "Save img - /home/dirl/github/Python-Crawling-Tutorial/results/6cf5653979671789ef664c9122f83f6f.JPEG\n", |
71 | | - "click\n", |
72 | | - "Message: chrome not reachable\n", |
73 | | - " (Session info: chrome=64.0.3282.119)\n", |
74 | | - " (Driver info: chromedriver=2.35.528139 (47ead77cb35ad2a9a83248b292151462a66cd881),platform=Linux 4.13.0-26-generic x86_64)\n", |
75 | | - "\n" |
| 47 | + "Save img - /home/dirl/github/Python-Crawling-Tutorial/results/ceecac6a5a9677750a69c80a87f26080.JPEG\n", |
| 48 | + "Save img - /home/dirl/github/Python-Crawling-Tutorial/results/ceecac6a5a9677750a69c80a87f26080.JPEG\n", |
| 49 | + "Save img - /home/dirl/github/Python-Crawling-Tutorial/results/4cce70c2cdde67af52e27920693da213.JPEG\n", |
| 50 | + "Save img - /home/dirl/github/Python-Crawling-Tutorial/results/1682c3490f1ec9df1da4a43407f890b7.JPEG\n", |
| 51 | + "Save img - /home/dirl/github/Python-Crawling-Tutorial/results/aa6a4d1bfa181fc53636a341562fb2ea.PNG\n" |
76 | 52 | ] |
77 | 53 | } |
78 | 54 | ], |
|
85 | 61 | " driver = webdriver.Chrome()\n", |
86 | 62 | " driver.get(url)\n", |
87 | 63 | " driver.maximize_window()\n", |
88 | | - " wait = WebDriverWait(driver, 10)\n", |
| 64 | + " driver.implicitly_wait(10)\n", |
89 | 65 | " compare_url = ''\n", |
90 | 66 | " \n", |
91 | 67 | " for i in range(5):\n", |
92 | 68 | " # get image\n", |
93 | | - " #img_el = driver.find_element(By.XPATH, '//div[@id=\"recaptcha_image\"]/img')\n", |
94 | | - " img_el = wait.until(ReloadImage(By.XPATH, '//div[@id=\"recaptcha_image\"]/img', compare_url))\n", |
| 69 | + " img_el = driver.find_element(By.XPATH, '//div[@id=\"recaptcha_image\"]/img')\n", |
95 | 70 | " img_url = img_el.get_attribute('src')\n", |
96 | 71 | " img_filename = hashlib.md5(img_url.encode('utf-8')).hexdigest()\n", |
97 | 72 | " compare_url = img_url\n", |
|
106 | 81 | " \n", |
107 | 82 | " # re-generate image\n", |
108 | 83 | " btn_refresh = driver.find_element(By.XPATH, '//*[@id=\"recaptcha_reload_btn\"]').click()\n", |
109 | | - " print('click')\n", |
| 84 | + " time.sleep(2)\n", |
110 | 85 | " \n", |
111 | 86 | "\n", |
112 | 87 | "except Exception as e:\n", |
|
0 commit comments