afunTW
diff --git a/‎practice/10_facebook_crawling_article_comments.ipynb‎
Lines changed: 228 additions & 0 deletions b/‎practice/10_facebook_crawling_article_comments.ipynb‎
Lines changed: 228 additions & 0 deletions
@@ -0,0 +1,228 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 練習\n",
+    "\n",
+    "- 取得 FB 文章底下所有留言\n",
+    "- 使用 [Graph API](https://developers.facebook.com/tools/explorer/)\n",
+    "- https://www.facebook.com/DoctorKoWJ/videos/1213927345375910/\n",
+    "- 輸出成 CSV"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import requests\n",
+    "import pandas as pd\n",
+    "\n",
+    "from datetime import datetime"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# 透過 Graph API 觀察文章 ID 與 token\n",
+    "article_id = '1213927345375910'\n",
+    "token = ''"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "pages 1\n",
+      "pages 2\n",
+      "pages 3\n",
+      "pages 4\n",
+      "EOF\n",
+      "comment length = 431\n"
+     ]
+    }
+   ],
+   "source": [
+    "comments = []\n",
+    "pages = 0\n",
+    "\n",
+    "url = 'https://graph.facebook.com/v2.11/{}/comments?pretty=0&limit={}&access_token={}'.format(\n",
+    "    article_id, 100, token\n",
+    ")\n",
+    "\n",
+    "while True:\n",
+    "    pages += 1\n",
+    "    resp = requests.get(url)\n",
+    "    data = resp.json()\n",
+    "    comments += data['data']\n",
+    "    \n",
+    "    if 'next' not in data['paging']:\n",
+    "        print('EOF')\n",
+    "        break\n",
+    "    else:\n",
+    "        url = data['paging']['next']\n",
+    "        print('pages {}'.format(pages))\n",
+    "        \n",
+    "print('comment length = {}'.format(len(comments)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style>\n",
+       "    .dataframe thead tr:only-child th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>created_time</th>\n",
+       "      <th>from</th>\n",
+       "      <th>id</th>\n",
+       "      <th>message</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2018-01-09T11:02:42+0000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1213927345375910_1213982232037088</td>\n",
+       "      <td>市長，謝謝您注意到這個議題。但是，不知道您是否同時有發現，比起醫療環境，更加威脅台灣幼兒的，...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2018-01-09T11:07:44+0000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1213927345375910_1213985318703446</td>\n",
+       "      <td>我希望如果有天你有能力了，可以為被虐的兒童提出修法保護，更另闢一個無力撫養孩子的人一個出口，...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2018-01-09T11:21:33+0000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1213927345375910_1213993592035952</td>\n",
+       "      <td>我也是重症兒童家屬\\n感謝你的發言\\n我第一次看到有政治人物願意大聲疾呼\\n但不是說沒有其他...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2018-01-09T09:34:35+0000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1213927345375910_1213934828708495</td>\n",
+       "      <td>每次看到你就覺得台灣還有希望\\n不在乎選票在乎的是人</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2018-01-09T11:28:25+0000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1213927345375910_1213997665368878</td>\n",
+       "      <td>每當我覺得天下的烏鴉一般黑的時候 看到你的發文 又讓我覺得繼續奮鬥 台灣會被照亮的 柯文哲 ...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               created_time from                                 id  \\\n",
+       "0  2018-01-09T11:02:42+0000  NaN  1213927345375910_1213982232037088   \n",
+       "1  2018-01-09T11:07:44+0000  NaN  1213927345375910_1213985318703446   \n",
+       "2  2018-01-09T11:21:33+0000  NaN  1213927345375910_1213993592035952   \n",
+       "3  2018-01-09T09:34:35+0000  NaN  1213927345375910_1213934828708495   \n",
+       "4  2018-01-09T11:28:25+0000  NaN  1213927345375910_1213997665368878   \n",
+       "\n",
+       "                                             message  \n",
+       "0  市長，謝謝您注意到這個議題。但是，不知道您是否同時有發現，比起醫療環境，更加威脅台灣幼兒的，...  \n",
+       "1  我希望如果有天你有能力了，可以為被虐的兒童提出修法保護，更另闢一個無力撫養孩子的人一個出口，...  \n",
+       "2  我也是重症兒童家屬\\n感謝你的發言\\n我第一次看到有政治人物願意大聲疾呼\\n但不是說沒有其他...  \n",
+       "3                         每次看到你就覺得台灣還有希望\\n不在乎選票在乎的是人  \n",
+       "4  每當我覺得天下的烏鴉一般黑的時候 看到你的發文 又讓我覺得繼續奮鬥 台灣會被照亮的 柯文哲 ...  "
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = pd.DataFrame.from_records(comments)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Save file - /home/dirl/github/Python-Crawling-Tutorial/results/1213927345375910.csv\n"
+     ]
+    }
+   ],
+   "source": [
+    "results = os.path.abspath('../results')\n",
+    "if not os.path.exists(results):\n",
+    "    os.makedirs(results)\n",
+    "\n",
+    "filename = os.path.join(results, '{}.csv'.format(article_id))\n",
+    "df.to_csv(filename, index=False)\n",
+    "print('Save file - {}'.format(filename))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}