Skip to content

Commit 64248cb

Browse files
committed
add: Graph API practice
1 parent 1770a11 commit 64248cb

File tree

3 files changed

+850
-0
lines changed

3 files changed

+850
-0
lines changed
Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# 練習\n",
8+
"\n",
9+
"- 取得 FB 文章底下所有留言\n",
10+
"- 使用 [Graph API](https://developers.facebook.com/tools/explorer/)\n",
11+
"- https://www.facebook.com/DoctorKoWJ/videos/1213927345375910/\n",
12+
"- 輸出成 CSV"
13+
]
14+
},
15+
{
16+
"cell_type": "code",
17+
"execution_count": 1,
18+
"metadata": {
19+
"collapsed": true
20+
},
21+
"outputs": [],
22+
"source": [
23+
"import os\n",
24+
"import requests\n",
25+
"import pandas as pd\n",
26+
"\n",
27+
"from datetime import datetime"
28+
]
29+
},
30+
{
31+
"cell_type": "code",
32+
"execution_count": 2,
33+
"metadata": {
34+
"collapsed": true
35+
},
36+
"outputs": [],
37+
"source": [
38+
"# 透過 Graph API 觀察文章 ID 與 token\n",
39+
"article_id = '1213927345375910'\n",
40+
"token = ''"
41+
]
42+
},
43+
{
44+
"cell_type": "code",
45+
"execution_count": 3,
46+
"metadata": {},
47+
"outputs": [
48+
{
49+
"name": "stdout",
50+
"output_type": "stream",
51+
"text": [
52+
"pages 1\n",
53+
"pages 2\n",
54+
"pages 3\n",
55+
"pages 4\n",
56+
"EOF\n",
57+
"comment length = 431\n"
58+
]
59+
}
60+
],
61+
"source": [
62+
"comments = []\n",
63+
"pages = 0\n",
64+
"\n",
65+
"url = 'https://graph.facebook.com/v2.11/{}/comments?pretty=0&limit={}&access_token={}'.format(\n",
66+
" article_id, 100, token\n",
67+
")\n",
68+
"\n",
69+
"while True:\n",
70+
" pages += 1\n",
71+
" resp = requests.get(url)\n",
72+
" data = resp.json()\n",
73+
" comments += data['data']\n",
74+
" \n",
75+
" if 'next' not in data['paging']:\n",
76+
" print('EOF')\n",
77+
" break\n",
78+
" else:\n",
79+
" url = data['paging']['next']\n",
80+
" print('pages {}'.format(pages))\n",
81+
" \n",
82+
"print('comment length = {}'.format(len(comments)))"
83+
]
84+
},
85+
{
86+
"cell_type": "code",
87+
"execution_count": 4,
88+
"metadata": {},
89+
"outputs": [
90+
{
91+
"data": {
92+
"text/html": [
93+
"<div>\n",
94+
"<style>\n",
95+
" .dataframe thead tr:only-child th {\n",
96+
" text-align: right;\n",
97+
" }\n",
98+
"\n",
99+
" .dataframe thead th {\n",
100+
" text-align: left;\n",
101+
" }\n",
102+
"\n",
103+
" .dataframe tbody tr th {\n",
104+
" vertical-align: top;\n",
105+
" }\n",
106+
"</style>\n",
107+
"<table border=\"1\" class=\"dataframe\">\n",
108+
" <thead>\n",
109+
" <tr style=\"text-align: right;\">\n",
110+
" <th></th>\n",
111+
" <th>created_time</th>\n",
112+
" <th>from</th>\n",
113+
" <th>id</th>\n",
114+
" <th>message</th>\n",
115+
" </tr>\n",
116+
" </thead>\n",
117+
" <tbody>\n",
118+
" <tr>\n",
119+
" <th>0</th>\n",
120+
" <td>2018-01-09T11:02:42+0000</td>\n",
121+
" <td>NaN</td>\n",
122+
" <td>1213927345375910_1213982232037088</td>\n",
123+
" <td>市長,謝謝您注意到這個議題。但是,不知道您是否同時有發現,比起醫療環境,更加威脅台灣幼兒的,...</td>\n",
124+
" </tr>\n",
125+
" <tr>\n",
126+
" <th>1</th>\n",
127+
" <td>2018-01-09T11:07:44+0000</td>\n",
128+
" <td>NaN</td>\n",
129+
" <td>1213927345375910_1213985318703446</td>\n",
130+
" <td>我希望如果有天你有能力了,可以為被虐的兒童提出修法保護,更另闢一個無力撫養孩子的人一個出口,...</td>\n",
131+
" </tr>\n",
132+
" <tr>\n",
133+
" <th>2</th>\n",
134+
" <td>2018-01-09T11:21:33+0000</td>\n",
135+
" <td>NaN</td>\n",
136+
" <td>1213927345375910_1213993592035952</td>\n",
137+
" <td>我也是重症兒童家屬\\n感謝你的發言\\n我第一次看到有政治人物願意大聲疾呼\\n但不是說沒有其他...</td>\n",
138+
" </tr>\n",
139+
" <tr>\n",
140+
" <th>3</th>\n",
141+
" <td>2018-01-09T09:34:35+0000</td>\n",
142+
" <td>NaN</td>\n",
143+
" <td>1213927345375910_1213934828708495</td>\n",
144+
" <td>每次看到你就覺得台灣還有希望\\n不在乎選票在乎的是人</td>\n",
145+
" </tr>\n",
146+
" <tr>\n",
147+
" <th>4</th>\n",
148+
" <td>2018-01-09T11:28:25+0000</td>\n",
149+
" <td>NaN</td>\n",
150+
" <td>1213927345375910_1213997665368878</td>\n",
151+
" <td>每當我覺得天下的烏鴉一般黑的時候 看到你的發文 又讓我覺得繼續奮鬥 台灣會被照亮的 柯文哲 ...</td>\n",
152+
" </tr>\n",
153+
" </tbody>\n",
154+
"</table>\n",
155+
"</div>"
156+
],
157+
"text/plain": [
158+
" created_time from id \\\n",
159+
"0 2018-01-09T11:02:42+0000 NaN 1213927345375910_1213982232037088 \n",
160+
"1 2018-01-09T11:07:44+0000 NaN 1213927345375910_1213985318703446 \n",
161+
"2 2018-01-09T11:21:33+0000 NaN 1213927345375910_1213993592035952 \n",
162+
"3 2018-01-09T09:34:35+0000 NaN 1213927345375910_1213934828708495 \n",
163+
"4 2018-01-09T11:28:25+0000 NaN 1213927345375910_1213997665368878 \n",
164+
"\n",
165+
" message \n",
166+
"0 市長,謝謝您注意到這個議題。但是,不知道您是否同時有發現,比起醫療環境,更加威脅台灣幼兒的,... \n",
167+
"1 我希望如果有天你有能力了,可以為被虐的兒童提出修法保護,更另闢一個無力撫養孩子的人一個出口,... \n",
168+
"2 我也是重症兒童家屬\\n感謝你的發言\\n我第一次看到有政治人物願意大聲疾呼\\n但不是說沒有其他... \n",
169+
"3 每次看到你就覺得台灣還有希望\\n不在乎選票在乎的是人 \n",
170+
"4 每當我覺得天下的烏鴉一般黑的時候 看到你的發文 又讓我覺得繼續奮鬥 台灣會被照亮的 柯文哲 ... "
171+
]
172+
},
173+
"execution_count": 4,
174+
"metadata": {},
175+
"output_type": "execute_result"
176+
}
177+
],
178+
"source": [
179+
"df = pd.DataFrame.from_records(comments)\n",
180+
"df.head()"
181+
]
182+
},
183+
{
184+
"cell_type": "code",
185+
"execution_count": 5,
186+
"metadata": {},
187+
"outputs": [
188+
{
189+
"name": "stdout",
190+
"output_type": "stream",
191+
"text": [
192+
"Save file - /home/dirl/github/Python-Crawling-Tutorial/results/1213927345375910.csv\n"
193+
]
194+
}
195+
],
196+
"source": [
197+
"results = os.path.abspath('../results')\n",
198+
"if not os.path.exists(results):\n",
199+
" os.makedirs(results)\n",
200+
"\n",
201+
"filename = os.path.join(results, '{}.csv'.format(article_id))\n",
202+
"df.to_csv(filename, index=False)\n",
203+
"print('Save file - {}'.format(filename))"
204+
]
205+
}
206+
],
207+
"metadata": {
208+
"kernelspec": {
209+
"display_name": "Python 3",
210+
"language": "python",
211+
"name": "python3"
212+
},
213+
"language_info": {
214+
"codemirror_mode": {
215+
"name": "ipython",
216+
"version": 3
217+
},
218+
"file_extension": ".py",
219+
"mimetype": "text/x-python",
220+
"name": "python",
221+
"nbconvert_exporter": "python",
222+
"pygments_lexer": "ipython3",
223+
"version": "3.5.2"
224+
}
225+
},
226+
"nbformat": 4,
227+
"nbformat_minor": 2
228+
}

0 commit comments

Comments
 (0)