|
17 | 17 | "colab": { |
18 | 18 | "base_uri": "https://localhost:8080/" |
19 | 19 | }, |
20 | | - "id": "xC9f1uA-OX8J", |
21 | | - "outputId": "8596b211-ad7c-4efd-8fc1-bbbc4babd2e2" |
| 20 | + "id": "UBnT5t_LiCU2", |
| 21 | + "outputId": "ca0bcea9-75a7-4237-e58e-154c3d72e89f" |
22 | 22 | }, |
23 | 23 | "outputs": [ |
24 | 24 | { |
25 | 25 | "name": "stdout", |
26 | 26 | "output_type": "stream", |
27 | 27 | "text": [ |
28 | | - "Requirement already satisfied: fasttext==0.9.2 in /usr/local/lib/python3.7/dist-packages (0.9.2)\n", |
29 | | - "Requirement already satisfied: setuptools>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from fasttext==0.9.2) (56.1.0)\n", |
30 | | - "Requirement already satisfied: pybind11>=2.2 in /usr/local/lib/python3.7/dist-packages (from fasttext==0.9.2) (2.6.2)\n", |
31 | | - "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from fasttext==0.9.2) (1.19.5)\n" |
| 28 | + "Requirement already satisfied: pandas==1.1.5 in /usr/local/lib/python3.7/dist-packages (1.1.5)\n", |
| 29 | + "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas==1.1.5) (2018.9)\n", |
| 30 | + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas==1.1.5) (2.8.1)\n", |
| 31 | + "Requirement already satisfied: numpy>=1.15.4 in /usr/local/lib/python3.7/dist-packages (from pandas==1.1.5) (1.19.5)\n", |
| 32 | + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas==1.1.5) (1.15.0)\n", |
| 33 | + "Collecting wget==3.2\n", |
| 34 | + " Downloading https://files.pythonhosted.org/packages/47/6a/62e288da7bcda82b935ff0c6cfe542970f04e29c756b0e147251b2fb251f/wget-3.2.zip\n", |
| 35 | + "Building wheels for collected packages: wget\n", |
| 36 | + " Building wheel for wget (setup.py) ... \u001b[?25l\u001b[?25hdone\n", |
| 37 | + " Created wheel for wget: filename=wget-3.2-cp37-none-any.whl size=9675 sha256=0e1e014b6bf086637aea4bfe15707b7d8d825e7280cd2f9c6ec1943ef00e80c7\n", |
| 38 | + " Stored in directory: /root/.cache/pip/wheels/40/15/30/7d8f7cea2902b4db79e3fea550d7d7b85ecb27ef992b618f3f\n", |
| 39 | + "Successfully built wget\n", |
| 40 | + "Installing collected packages: wget\n", |
| 41 | + "Successfully installed wget-3.2\n", |
| 42 | + "Collecting fasttext==0.9.2\n", |
| 43 | + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/f8/85/e2b368ab6d3528827b147fdb814f8189acc981a4bc2f99ab894650e05c40/fasttext-0.9.2.tar.gz (68kB)\n", |
| 44 | + "\u001b[K |████████████████████████████████| 71kB 6.8MB/s \n", |
| 45 | + "\u001b[?25hRequirement already satisfied: pybind11>=2.2 in /usr/local/lib/python3.7/dist-packages (from fasttext==0.9.2) (2.6.2)\n", |
| 46 | + "Requirement already satisfied: setuptools>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from fasttext==0.9.2) (57.0.0)\n", |
| 47 | + "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from fasttext==0.9.2) (1.19.5)\n", |
| 48 | + "Building wheels for collected packages: fasttext\n", |
| 49 | + " Building wheel for fasttext (setup.py) ... \u001b[?25l\u001b[?25hdone\n", |
| 50 | + " Created wheel for fasttext: filename=fasttext-0.9.2-cp37-cp37m-linux_x86_64.whl size=3091748 sha256=f30effec512519a72b11f0eaf7aa8a6b57df1643345f8e51bf7b1cb010552792\n", |
| 51 | + " Stored in directory: /root/.cache/pip/wheels/98/ba/7f/b154944a1cf5a8cee91c154b75231136cc3a3321ab0e30f592\n", |
| 52 | + "Successfully built fasttext\n", |
| 53 | + "Installing collected packages: fasttext\n", |
| 54 | + "Successfully installed fasttext-0.9.2\n" |
32 | 55 | ] |
33 | 56 | } |
34 | 57 | ], |
35 | 58 | "source": [ |
36 | | - "!pip install fasttext==0.9.2" |
| 59 | + "# To install only the requirements of this notebook, uncomment the lines below and run this cell\n", |
| 60 | + "\n", |
| 61 | + "# ===========================\n", |
| 62 | + "\n", |
| 63 | + "!pip install pandas==1.1.5\n", |
| 64 | + "!pip install wget==3.2\n", |
| 65 | + "!pip install fasttext==0.9.2\n", |
| 66 | + "\n", |
| 67 | + "# ===========================" |
37 | 68 | ] |
38 | 69 | }, |
39 | 70 | { |
40 | 71 | "cell_type": "code", |
41 | 72 | "execution_count": 2, |
| 73 | + "metadata": { |
| 74 | + "id": "zrBi6bvbiCU4" |
| 75 | + }, |
| 76 | + "outputs": [], |
| 77 | + "source": [ |
| 78 | + "# To install the requirements for the entire chapter, uncomment the lines below and run this cell\n", |
| 79 | + "\n", |
| 80 | + "# ===========================\n", |
| 81 | + "\n", |
| 82 | + "# try:\n", |
| 83 | + "# import google.colab\n", |
| 84 | + "# !curl https://raw.githubusercontent.com/practical-nlp/practical-nlp/master/Ch4/ch4-requirements.txt | xargs -n 1 -L 1 pip install\n", |
| 85 | + "# except ModuleNotFoundError:\n", |
| 86 | + "# !pip install -r \"ch4-requirements.txt\"\n", |
| 87 | + "\n", |
| 88 | + "# ===========================" |
| 89 | + ] |
| 90 | + }, |
| 91 | + { |
| 92 | + "cell_type": "code", |
| 93 | + "execution_count": 3, |
42 | 94 | "metadata": { |
43 | 95 | "id": "YKgZXvTGb61z" |
44 | 96 | }, |
|
53 | 105 | }, |
54 | 106 | { |
55 | 107 | "cell_type": "code", |
56 | | - "execution_count": 3, |
| 108 | + "execution_count": 4, |
57 | 109 | "metadata": { |
58 | 110 | "colab": { |
59 | 111 | "base_uri": "https://localhost:8080/" |
60 | 112 | }, |
61 | 113 | "id": "l6CfW7C3L4EB", |
62 | | - "outputId": "53b9c39f-41fb-4a51-af80-9abc1deb89a8" |
| 114 | + "outputId": "debf3639-77d2-4a2c-8aa1-3ff8438b9585" |
63 | 115 | }, |
64 | 116 | "outputs": [ |
65 | 117 | { |
66 | 118 | "name": "stdout", |
67 | 119 | "output_type": "stream", |
68 | 120 | "text": [ |
69 | | - "--2021-05-31 06:44:37-- https://github.com/le-scientifique/torchDatasets/raw/master/dbpedia_csv.tar.gz\n", |
70 | | - "Resolving github.com (github.com)... 192.30.255.113\n", |
71 | | - "Connecting to github.com (github.com)|192.30.255.113|:443... connected.\n", |
| 121 | + "--2021-07-16 08:57:35-- https://github.com/le-scientifique/torchDatasets/raw/master/dbpedia_csv.tar.gz\n", |
| 122 | + "Resolving github.com (github.com)... 140.82.121.4\n", |
| 123 | + "Connecting to github.com (github.com)|140.82.121.4|:443... connected.\n", |
72 | 124 | "HTTP request sent, awaiting response... 301 Moved Permanently\n", |
73 | 125 | "Location: https://github.com/srhrshr/torchDatasets/raw/master/dbpedia_csv.tar.gz [following]\n", |
74 | | - "--2021-05-31 06:44:37-- https://github.com/srhrshr/torchDatasets/raw/master/dbpedia_csv.tar.gz\n", |
| 126 | + "--2021-07-16 08:57:35-- https://github.com/srhrshr/torchDatasets/raw/master/dbpedia_csv.tar.gz\n", |
75 | 127 | "Reusing existing connection to github.com:443.\n", |
76 | 128 | "HTTP request sent, awaiting response... 302 Found\n", |
77 | 129 | "Location: https://raw.githubusercontent.com/srhrshr/torchDatasets/master/dbpedia_csv.tar.gz [following]\n", |
78 | | - "--2021-05-31 06:44:37-- https://raw.githubusercontent.com/srhrshr/torchDatasets/master/dbpedia_csv.tar.gz\n", |
| 130 | + "--2021-07-16 08:57:35-- https://raw.githubusercontent.com/srhrshr/torchDatasets/master/dbpedia_csv.tar.gz\n", |
79 | 131 | "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", |
80 | 132 | "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", |
81 | 133 | "HTTP request sent, awaiting response... 200 OK\n", |
82 | 134 | "Length: 68431223 (65M) [application/octet-stream]\n", |
83 | | - "Saving to: ‘DATAPATH/dbpedia_csv.tar.gz.1’\n", |
| 135 | + "Saving to: ‘DATAPATH/dbpedia_csv.tar.gz’\n", |
84 | 136 | "\n", |
85 | | - "dbpedia_csv.tar.gz. 100%[===================>] 65.26M 124MB/s in 0.5s \n", |
| 137 | + "dbpedia_csv.tar.gz 100%[===================>] 65.26M 206MB/s in 0.3s \n", |
86 | 138 | "\n", |
87 | | - "2021-05-31 06:44:38 (124 MB/s) - ‘DATAPATH/dbpedia_csv.tar.gz.1’ saved [68431223/68431223]\n", |
| 139 | + "2021-07-16 08:57:42 (206 MB/s) - ‘DATAPATH/dbpedia_csv.tar.gz’ saved [68431223/68431223]\n", |
88 | 140 | "\n", |
89 | 141 | "dbpedia_csv/\n", |
90 | 142 | "dbpedia_csv/test.csv\n", |
91 | 143 | "dbpedia_csv/classes.txt\n", |
92 | 144 | "dbpedia_csv/train.csv\n", |
93 | 145 | "dbpedia_csv/readme.txt\n", |
94 | | - "total 328M\n", |
95 | | - "drwxr-xr-x 3 root root 4.0K May 31 06:44 .\n", |
96 | | - "drwxr-xr-x 1 root root 4.0K May 31 06:40 ..\n", |
| 146 | + "total 66M\n", |
| 147 | + "drwxr-xr-x 3 root root 4.0K Jul 16 08:57 .\n", |
| 148 | + "drwxr-xr-x 1 root root 4.0K Jul 16 08:57 ..\n", |
97 | 149 | "drwxrwxr-x 2 1000 1000 4.0K Mar 29 2015 dbpedia_csv\n", |
98 | | - "-rw-r--r-- 1 root root 66M May 31 06:40 dbpedia_csv.tar.gz\n", |
99 | | - "-rw-r--r-- 1 root root 66M May 31 06:44 dbpedia_csv.tar.gz.1\n", |
100 | | - "-rw-r--r-- 1 root root 22M May 31 06:41 dbpedia_test.csv\n", |
101 | | - "-rw-r--r-- 1 root root 175M May 31 06:41 dbpedia_train.csv\n" |
| 150 | + "-rw-r--r-- 1 root root 66M Jul 16 08:57 dbpedia_csv.tar.gz\n" |
102 | 151 | ] |
103 | 152 | } |
104 | 153 | ], |
|
139 | 188 | }, |
140 | 189 | { |
141 | 190 | "cell_type": "code", |
142 | | - "execution_count": 4, |
| 191 | + "execution_count": 5, |
143 | 192 | "metadata": { |
144 | 193 | "colab": { |
145 | 194 | "base_uri": "https://localhost:8080/" |
146 | 195 | }, |
147 | 196 | "id": "lMoRw3oQb62I", |
148 | | - "outputId": "3a263fa1-1940-42d3-d1e6-aa59c5fb6f09" |
| 197 | + "outputId": "744d1cb7-4966-4db1-b176-c2020975ed94" |
149 | 198 | }, |
150 | 199 | "outputs": [ |
151 | 200 | { |
|
169 | 218 | }, |
170 | 219 | { |
171 | 220 | "cell_type": "code", |
172 | | - "execution_count": 5, |
| 221 | + "execution_count": 6, |
173 | 222 | "metadata": { |
174 | 223 | "colab": { |
175 | 224 | "base_uri": "https://localhost:8080/", |
176 | | - "height": 195 |
| 225 | + "height": 204 |
177 | 226 | }, |
178 | 227 | "id": "gaz226vXb62W", |
179 | | - "outputId": "d1877377-b282-4038-9f87-3589f6621597" |
| 228 | + "outputId": "a7e5ab41-732e-4a94-def6-5e62124d6bd5" |
180 | 229 | }, |
181 | 230 | "outputs": [ |
182 | 231 | { |
|
257 | 306 | "[5 rows x 4 columns]" |
258 | 307 | ] |
259 | 308 | }, |
260 | | - "execution_count": 5, |
| 309 | + "execution_count": 6, |
261 | 310 | "metadata": { |
262 | 311 | "tags": [] |
263 | 312 | }, |
|
291 | 340 | }, |
292 | 341 | { |
293 | 342 | "cell_type": "code", |
294 | | - "execution_count": 6, |
| 343 | + "execution_count": 7, |
295 | 344 | "metadata": { |
296 | 345 | "colab": { |
297 | 346 | "base_uri": "https://localhost:8080/" |
298 | 347 | }, |
299 | 348 | "id": "si7VC_Rub62a", |
300 | | - "outputId": "9acc53e2-c417-478f-e0a2-61b9ee57cadc" |
| 349 | + "outputId": "a1f7d406-0e9c-4adf-eaee-fc09572f27bf" |
301 | 350 | }, |
302 | 351 | "outputs": [ |
303 | 352 | { |
304 | 353 | "data": { |
305 | 354 | "text/plain": [ |
306 | | - "Building 40000\n", |
307 | | - "Animal 40000\n", |
308 | | - "Village 40000\n", |
309 | | - "OfficeHolder 40000\n", |
| 355 | + "Athlete 40000\n", |
310 | 356 | "MeanOfTransportation 40000\n", |
| 357 | + "Film 40000\n", |
| 358 | + "Artist 40000\n", |
| 359 | + "Building 40000\n", |
311 | 360 | "Company 40000\n", |
312 | | - "Athlete 40000\n", |
313 | | - "NaturalPlace 40000\n", |
314 | 361 | "Plant 40000\n", |
| 362 | + "Album 40000\n", |
| 363 | + "NaturalPlace 40000\n", |
| 364 | + "Village 40000\n", |
315 | 365 | "EducationalInstitution 40000\n", |
| 366 | + "Animal 40000\n", |
316 | 367 | "WrittenWork 40000\n", |
317 | | - "Album 40000\n", |
318 | | - "Artist 40000\n", |
319 | | - "Film 40000\n", |
| 368 | + "OfficeHolder 40000\n", |
320 | 369 | "Name: class_name, dtype: int64" |
321 | 370 | ] |
322 | 371 | }, |
323 | | - "execution_count": 6, |
| 372 | + "execution_count": 7, |
324 | 373 | "metadata": { |
325 | 374 | "tags": [] |
326 | 375 | }, |
|
333 | 382 | }, |
334 | 383 | { |
335 | 384 | "cell_type": "code", |
336 | | - "execution_count": 7, |
| 385 | + "execution_count": 8, |
337 | 386 | "metadata": { |
338 | 387 | "id": "Sn-3kIqMb62d" |
339 | 388 | }, |
|
371 | 420 | }, |
372 | 421 | { |
373 | 422 | "cell_type": "code", |
374 | | - "execution_count": 8, |
| 423 | + "execution_count": 9, |
375 | 424 | "metadata": { |
376 | 425 | "colab": { |
377 | 426 | "base_uri": "https://localhost:8080/" |
378 | 427 | }, |
379 | 428 | "id": "r_DRvdFcb62m", |
380 | | - "outputId": "59a687ed-359a-4c95-b26f-77fcc084ad8b" |
| 429 | + "outputId": "d3fc1348-fcb2-4f50-c090-067e5ca66301" |
381 | 430 | }, |
382 | 431 | "outputs": [ |
383 | 432 | { |
384 | 433 | "name": "stdout", |
385 | 434 | "output_type": "stream", |
386 | 435 | "text": [ |
387 | | - "CPU times: user 4.31 s, sys: 196 ms, total: 4.5 s\n", |
388 | | - "Wall time: 4.5 s\n" |
| 436 | + "CPU times: user 4.38 s, sys: 193 ms, total: 4.57 s\n", |
| 437 | + "Wall time: 4.63 s\n" |
389 | 438 | ] |
390 | 439 | } |
391 | 440 | ], |
|
398 | 447 | }, |
399 | 448 | { |
400 | 449 | "cell_type": "code", |
401 | | - "execution_count": 9, |
| 450 | + "execution_count": 10, |
402 | 451 | "metadata": { |
403 | 452 | "id": "imMZ9-Bkb62t" |
404 | 453 | }, |
|
423 | 472 | }, |
424 | 473 | { |
425 | 474 | "cell_type": "code", |
426 | | - "execution_count": 10, |
| 475 | + "execution_count": 11, |
427 | 476 | "metadata": { |
428 | 477 | "colab": { |
429 | 478 | "base_uri": "https://localhost:8080/" |
430 | 479 | }, |
431 | 480 | "id": "a-H1wouCb62x", |
432 | | - "outputId": "1d4d5272-adc8-4ed9-e6a1-002e79b4d147" |
| 481 | + "outputId": "3d7c130a-fd3b-472c-8585-2e965017763f" |
433 | 482 | }, |
434 | 483 | "outputs": [ |
435 | 484 | { |
436 | 485 | "name": "stdout", |
437 | 486 | "output_type": "stream", |
438 | 487 | "text": [ |
439 | | - "CPU times: user 1h 4s, sys: 13.6 s, total: 1h 18s\n", |
440 | | - "Wall time: 30min 41s\n" |
| 488 | + "CPU times: user 1h 3min 10s, sys: 12.8 s, total: 1h 3min 23s\n", |
| 489 | + "Wall time: 32min 17s\n" |
441 | 490 | ] |
442 | 491 | } |
443 | 492 | ], |
|
456 | 505 | }, |
457 | 506 | { |
458 | 507 | "cell_type": "code", |
459 | | - "execution_count": 11, |
| 508 | + "execution_count": 12, |
460 | 509 | "metadata": { |
461 | 510 | "colab": { |
462 | 511 | "base_uri": "https://localhost:8080/" |
463 | 512 | }, |
464 | 513 | "id": "sAyN3ZDbQFq-", |
465 | | - "outputId": "6f861f1f-a174-495c-97eb-db149fa73766" |
| 514 | + "outputId": "13acbc62-48d9-469c-dfb1-d3e5446b8530" |
466 | 515 | }, |
467 | 516 | "outputs": [ |
468 | 517 | { |
469 | 518 | "name": "stdout", |
470 | 519 | "output_type": "stream", |
471 | 520 | "text": [ |
472 | | - "Test Samples: 70000 Precision@1 : 94.0343 Recall@1 : 94.0343\n", |
473 | | - "Test Samples: 70000 Precision@2 : 48.4336 Recall@2 : 96.8671\n", |
474 | | - "Test Samples: 70000 Precision@3 : 32.3905 Recall@3 : 97.1714\n", |
475 | | - "Test Samples: 70000 Precision@4 : 24.6318 Recall@4 : 98.5271\n", |
476 | | - "Test Samples: 70000 Precision@5 : 19.8137 Recall@5 : 99.0686\n" |
| 521 | + "Test Samples: 70000 Precision@1 : 92.2486 Recall@1 : 92.2486\n", |
| 522 | + "Test Samples: 70000 Precision@2 : 48.5014 Recall@2 : 97.0029\n", |
| 523 | + "Test Samples: 70000 Precision@3 : 32.5619 Recall@3 : 97.6857\n", |
| 524 | + "Test Samples: 70000 Precision@4 : 24.4968 Recall@4 : 97.9871\n", |
| 525 | + "Test Samples: 70000 Precision@5 : 19.6420 Recall@5 : 98.2100\n" |
477 | 526 | ] |
478 | 527 | } |
479 | 528 | ], |
|
0 commit comments