File tree Expand file tree Collapse file tree 1 file changed +95
-0
lines changed Expand file tree Collapse file tree 1 file changed +95
-0
lines changed Original file line number Diff line number Diff line change 1+ {
2+ "cells" : [
3+ {
4+ "cell_type" : " markdown" ,
5+ "metadata" : {},
6+ "source" : [
7+ " # 練習\n " ,
8+ " \n " ,
9+ " - 觀察 http://www.taipeibo.com/yearly/ 並撰寫爬蟲程式\n " ,
10+ " - 抓取當年度週末冠軍排行榜\n " ,
11+ " - 使用 requests + BeautifulSoup 實作\n " ,
12+ " - 透過 pandas 輸出成 csv"
13+ ]
14+ },
15+ {
16+ "cell_type" : " code" ,
17+ "execution_count" : 2 ,
18+ "metadata" : {
19+ "collapsed" : true
20+ },
21+ "outputs" : [],
22+ "source" : [
23+ " import requests\n " ,
24+ " import pandas as pd\n " ,
25+ " \n " ,
26+ " from bs4 import BeautifulSoup\n " ,
27+ " \n " ,
28+ " url = 'http://www.taipeibo.com/yearly/'"
29+ ]
30+ },
31+ {
32+ "cell_type" : " code" ,
33+ "execution_count" : 5 ,
34+ "metadata" : {
35+ "collapsed" : true
36+ },
37+ "outputs" : [],
38+ "source" : [
39+ " resp = requests.get(url)\n " ,
40+ " resp.encoding = 'utf-8'\n " ,
41+ " soup = BeautifulSoup(resp.text, 'lxml')"
42+ ]
43+ },
44+ {
45+ "cell_type" : " code" ,
46+ "execution_count" : 7 ,
47+ "metadata" : {},
48+ "outputs" : [
49+ {
50+ "name" : " stdout" ,
51+ "output_type" : " stream" ,
52+ "text" : [
53+ " 5\n " ,
54+ " 4\n " ,
55+ " <tr class=\" tb-top\" >\n " ,
56+ " <th>週次</th>\n " ,
57+ " <th>日期</th>\n " ,
58+ " <th>週末票房總和</th>\n " ,
59+ " <th>漲跌幅</th>\n " ,
60+ " <th>冠軍片名</th>\n " ,
61+ " <th>英文片名</th>\n " ,
62+ " <th class=\" import\" >週末票房冠軍</th>\n " ,
63+ " <th>冠軍比例*</th>\n " ,
64+ " </tr>\n "
65+ ]
66+ }
67+ ],
68+ "source" : [
69+ " rows = soup.table.find_all('tr')\n " ,
70+ " colname = rows.pop(0)"
71+ ]
72+ }
73+ ],
74+ "metadata" : {
75+ "kernelspec" : {
76+ "display_name" : " Python 3" ,
77+ "language" : " python" ,
78+ "name" : " python3"
79+ },
80+ "language_info" : {
81+ "codemirror_mode" : {
82+ "name" : " ipython" ,
83+ "version" : 3
84+ },
85+ "file_extension" : " .py" ,
86+ "mimetype" : " text/x-python" ,
87+ "name" : " python" ,
88+ "nbconvert_exporter" : " python" ,
89+ "pygments_lexer" : " ipython3" ,
90+ "version" : " 3.5.2"
91+ }
92+ },
93+ "nbformat" : 4 ,
94+ "nbformat_minor" : 2
95+ }
You can’t perform that action at this time.
0 commit comments