Skip to content

Commit 4a64e4d

Browse files
committed
Merge branch 'dev'
2 parents 5bb124f + 3a9faf4 commit 4a64e4d

File tree

1 file changed

+137
-0
lines changed

1 file changed

+137
-0
lines changed
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# 爬取 google 搜尋結果的第一個頁面標題\n",
8+
"\n",
9+
"- 練習使用 beautifulsoup css selector"
10+
]
11+
},
12+
{
13+
"cell_type": "code",
14+
"execution_count": 1,
15+
"metadata": {},
16+
"outputs": [],
17+
"source": [
18+
"import requests\n",
19+
"\n",
20+
"from bs4 import BeautifulSoup\n",
21+
"from urllib3.exceptions import HTTPError\n",
22+
"from urllib.parse import urljoin"
23+
]
24+
},
25+
{
26+
"cell_type": "code",
27+
"execution_count": 2,
28+
"metadata": {},
29+
"outputs": [],
30+
"source": [
31+
"base_url = 'https://www.google.com.tw/search'\n",
32+
"query = {'q': 'python'}"
33+
]
34+
},
35+
{
36+
"cell_type": "code",
37+
"execution_count": 3,
38+
"metadata": {},
39+
"outputs": [
40+
{
41+
"name": "stdout",
42+
"output_type": "stream",
43+
"text": [
44+
"https://www.google.com.tw/search?q=python\n"
45+
]
46+
}
47+
],
48+
"source": [
49+
"try:\n",
50+
" resp = requests.get(base_url, params=query)\n",
51+
" soup = BeautifulSoup(resp.text, 'lxml')\n",
52+
" print(resp.url)\n",
53+
"except HTTPError as err:\n",
54+
" print(err)\n",
55+
"except AttributeError as err:\n",
56+
" print(err)"
57+
]
58+
},
59+
{
60+
"cell_type": "code",
61+
"execution_count": 4,
62+
"metadata": {},
63+
"outputs": [],
64+
"source": [
65+
"# print(soup.prettify())"
66+
]
67+
},
68+
{
69+
"cell_type": "code",
70+
"execution_count": 5,
71+
"metadata": {},
72+
"outputs": [
73+
{
74+
"name": "stdout",
75+
"output_type": "stream",
76+
"text": [
77+
"Welcome to Python.org\n",
78+
"https://www.google.com.tw/url?q=https://www.python.org/&sa=U&ved=0ahUKEwj-8PeJzfvdAhUEa7wKHfNzBOoQFggoMAA&usg=AOvVaw348GGzSkqgB-FXPinUSErY\n",
79+
"=======================================================================================\n",
80+
"Download Python | Python.org\n",
81+
"https://www.google.com.tw/url?q=https://www.python.org/downloads/&sa=U&ved=0ahUKEwj-8PeJzfvdAhUEa7wKHfNzBOoQFggzMAE&usg=AOvVaw2UHusa0FkZGKEoJRjlxYza\n",
82+
"=======================================================================================\n",
83+
"Python - 維基百科,自由的百科全書 - Wikipedia\n",
84+
"https://www.google.com.tw/url?q=https://zh.wikipedia.org/zh-tw/Python&sa=U&ved=0ahUKEwj-8PeJzfvdAhUEa7wKHfNzBOoQFgg5MAI&usg=AOvVaw1gsx_ugnMzjTP2nlH7zARm\n",
85+
"=======================================================================================\n",
86+
"一小時Python入門-part 1 - - 寫點科普\n",
87+
"https://www.google.com.tw/url?q=https://kopu.chat/2017/01/18/%25E4%25B8%2580%25E5%25B0%258F%25E6%2599%2582python%25E5%2585%25A5%25E9%2596%2580-part-1/&sa=U&ved=0ahUKEwj-8PeJzfvdAhUEa7wKHfNzBOoQFghEMAM&usg=AOvVaw1BLo112Hj6BBWauFDpnbQN\n",
88+
"=======================================================================================\n",
89+
"課程介紹- 成為python數據分析達人的第一課(自學課程) | 政治大學磨 ...\n",
90+
"https://www.google.com.tw/url?q=http://moocs.nccu.edu.tw/course/123&sa=U&ved=0ahUKEwj-8PeJzfvdAhUEa7wKHfNzBOoQFghOMAQ&usg=AOvVaw3RXTAa5ochrAyo-2evVdhI\n",
91+
"=======================================================================================\n",
92+
"《經濟學人》專文探討:「為什麼Python 是世上最屌的程式語言 ...\n",
93+
"https://www.google.com.tw/url?q=https://buzzorange.com/techorange/2018/08/01/python-a-skr-language/&sa=U&ved=0ahUKEwj-8PeJzfvdAhUEa7wKHfNzBOoQFghTMAU&usg=AOvVaw2yA2hrrl61qBKnKoEeeTix\n",
94+
"=======================================================================================\n",
95+
"Python 入門| Django Girls Taipei\n",
96+
"https://www.google.com.tw/url?q=http://djangogirlstaipei.herokuapp.com/tutorials/python/&sa=U&ved=0ahUKEwj-8PeJzfvdAhUEa7wKHfNzBOoQFghZMAY&usg=AOvVaw0ha-itZMKnVgaSsRQlcutt\n",
97+
"=======================================================================================\n",
98+
"Python Tutorial: Learn Python For Free | Codecademy\n",
99+
"https://www.google.com.tw/url?q=https://www.codecademy.com/learn/learn-python&sa=U&ved=0ahUKEwj-8PeJzfvdAhUEa7wKHfNzBOoQFghfMAc&usg=AOvVaw09DURYBaIbVzO6GSXKb0gH\n",
100+
"=======================================================================================\n",
101+
"Python Tutorial - W3Schools\n",
102+
"https://www.google.com.tw/url?q=https://www.w3schools.com/python/&sa=U&ved=0ahUKEwj-8PeJzfvdAhUEa7wKHfNzBOoQFghlMAg&usg=AOvVaw2kfYHx2obM5EhxwIrSMn-4\n",
103+
"=======================================================================================\n"
104+
]
105+
}
106+
],
107+
"source": [
108+
"search_results = soup.select('div.g > h3.r > a[href^=\"/url\"]')\n",
109+
"for search_item in search_results:\n",
110+
" print(search_item.text)\n",
111+
" print(urljoin(base_url, search_item['href']))\n",
112+
" print('='*87)"
113+
]
114+
}
115+
],
116+
"metadata": {
117+
"kernelspec": {
118+
"display_name": "Python 3",
119+
"language": "python",
120+
"name": "python3"
121+
},
122+
"language_info": {
123+
"codemirror_mode": {
124+
"name": "ipython",
125+
"version": 3
126+
},
127+
"file_extension": ".py",
128+
"mimetype": "text/x-python",
129+
"name": "python",
130+
"nbconvert_exporter": "python",
131+
"pygments_lexer": "ipython3",
132+
"version": "3.6.6"
133+
}
134+
},
135+
"nbformat": 4,
136+
"nbformat_minor": 2
137+
}

0 commit comments

Comments
 (0)