|
22 | 22 | }, |
23 | 23 | { |
24 | 24 | "cell_type": "code", |
25 | | - "execution_count": 0, |
| 25 | + "execution_count": 1, |
26 | 26 | "metadata": { |
27 | 27 | "colab": {}, |
28 | 28 | "colab_type": "code", |
|
37 | 37 | "from pprint import pprint\n", |
38 | 38 | "from sklearn.metrics import f1_score,classification_report\n", |
39 | 39 | "from sklearn.pipeline import Pipeline\n", |
40 | | - "import string\n" |
| 40 | + "import string\n", |
| 41 | + "import warnings\n", |
| 42 | + "warnings.filterwarnings('ignore')" |
41 | 43 | ] |
42 | 44 | }, |
43 | 45 | { |
|
52 | 54 | }, |
53 | 55 | { |
54 | 56 | "cell_type": "code", |
55 | | - "execution_count": 0, |
| 57 | + "execution_count": 2, |
56 | 58 | "metadata": { |
57 | 59 | "colab": {}, |
58 | 60 | "colab_type": "code", |
|
79 | 81 | " words.append(word)\n", |
80 | 82 | " tags.append(tag)\n", |
81 | 83 | " fh.close()\n", |
82 | | - " return myoutput\n" |
| 84 | + " return myoutput" |
83 | 85 | ] |
84 | 86 | }, |
85 | 87 | { |
86 | 88 | "cell_type": "code", |
87 | | - "execution_count": 0, |
| 89 | + "execution_count": 3, |
88 | 90 | "metadata": { |
89 | 91 | "colab": {}, |
90 | 92 | "colab_type": "code", |
|
150 | 152 | " wordfeats[\"nextNextTag\"] = sen_tags[i + 2][1]\n", |
151 | 153 | " #That is it! You can add whatever you want!\n", |
152 | 154 | " feats.append(wordfeats)\n", |
153 | | - " return feats\n" |
| 155 | + " return feats" |
154 | 156 | ] |
155 | 157 | }, |
156 | 158 | { |
|
165 | 167 | }, |
166 | 168 | { |
167 | 169 | "cell_type": "code", |
168 | | - "execution_count": 0, |
| 170 | + "execution_count": 4, |
169 | 171 | "metadata": { |
170 | 172 | "colab": {}, |
171 | 173 | "colab_type": "code", |
|
195 | 197 | }, |
196 | 198 | { |
197 | 199 | "cell_type": "code", |
198 | | - "execution_count": 0, |
| 200 | + "execution_count": 5, |
199 | 201 | "metadata": { |
200 | 202 | "colab": {}, |
201 | 203 | "colab_type": "code", |
|
217 | 219 | " print(metrics.flat_f1_score(Y_dev, y_pred,average='weighted', labels=labels))\n", |
218 | 220 | " print(metrics.flat_classification_report(Y_dev, y_pred, labels=sorted_labels, digits=3))\n", |
219 | 221 | " #print(metrics.sequence_accuracy_score(Y_dev, y_pred))\n", |
220 | | - " get_confusion_matrix(Y_dev, y_pred,labels=sorted_labels)\n" |
| 222 | + " get_confusion_matrix(Y_dev, y_pred,labels=sorted_labels)" |
221 | 223 | ] |
222 | 224 | }, |
223 | 225 | { |
|
232 | 234 | }, |
233 | 235 | { |
234 | 236 | "cell_type": "code", |
235 | | - "execution_count": 0, |
| 237 | + "execution_count": 6, |
236 | 238 | "metadata": { |
237 | 239 | "colab": {}, |
238 | 240 | "colab_type": "code", |
|
258 | 260 | " cell = \"%{0}.0f\".format(columnwidth) % cm[i, j]\n", |
259 | 261 | " sum = sum + int(cell)\n", |
260 | 262 | " print(cell, end=\" \")\n", |
261 | | - " print(sum) #Prints the total number of instances per cat at the end.\n" |
| 263 | + " print(sum) #Prints the total number of instances per cat at the end." |
262 | 264 | ] |
263 | 265 | }, |
264 | 266 | { |
265 | 267 | "cell_type": "code", |
266 | | - "execution_count": 0, |
| 268 | + "execution_count": 7, |
267 | 269 | "metadata": { |
268 | 270 | "colab": {}, |
269 | 271 | "colab_type": "code", |
|
278 | 280 | " for yseq_true, yseq_pred in zip(y_true, y_pred):\n", |
279 | 281 | " trues.extend(yseq_true)\n", |
280 | 282 | " preds.extend(yseq_pred)\n", |
281 | | - " print_cm(confusion_matrix(trues,preds,labels),labels)\n" |
| 283 | + " print_cm(confusion_matrix(trues,preds,labels),labels)" |
282 | 284 | ] |
283 | 285 | }, |
284 | 286 | { |
|
293 | 295 | }, |
294 | 296 | { |
295 | 297 | "cell_type": "code", |
296 | | - "execution_count": 0, |
| 298 | + "execution_count": 8, |
297 | 299 | "metadata": { |
298 | 300 | "colab": {}, |
299 | 301 | "colab_type": "code", |
|
307 | 309 | "text": [ |
308 | 310 | "Training a Sequence classification model with CRF\n", |
309 | 311 | "0.9255103670420659\n", |
310 | | - " precision recall f1-score support\n", |
| 312 | + " precision recall f1-score support\n", |
311 | 313 | "\n", |
312 | | - " O 0.973 0.981 0.977 38323\n", |
313 | | - " B-LOC 0.694 0.765 0.728 1668\n", |
314 | | - " I-LOC 0.738 0.482 0.584 257\n", |
315 | | - " B-MISC 0.648 0.309 0.419 702\n", |
316 | | - " I-MISC 0.626 0.505 0.559 216\n", |
317 | | - " B-ORG 0.670 0.561 0.611 1661\n", |
318 | | - " I-ORG 0.551 0.704 0.618 835\n", |
319 | | - " B-PER 0.773 0.766 0.769 1617\n", |
320 | | - " I-PER 0.819 0.886 0.851 1156\n", |
| 314 | + " O 0.973 0.981 0.977 38323\n", |
| 315 | + " B-LOC 0.694 0.765 0.728 1668\n", |
| 316 | + " I-LOC 0.738 0.482 0.584 257\n", |
| 317 | + " B-MISC 0.648 0.309 0.419 702\n", |
| 318 | + " I-MISC 0.626 0.505 0.559 216\n", |
| 319 | + " B-ORG 0.670 0.561 0.611 1661\n", |
| 320 | + " I-ORG 0.551 0.704 0.618 835\n", |
| 321 | + " B-PER 0.773 0.766 0.769 1617\n", |
| 322 | + " I-PER 0.819 0.886 0.851 1156\n", |
321 | 323 | "\n", |
322 | | - "avg / total 0.926 0.928 0.926 46435\n", |
| 324 | + " accuracy 0.928 46435\n", |
| 325 | + " macro avg 0.721 0.662 0.679 46435\n", |
| 326 | + "weighted avg 0.926 0.928 0.926 46435\n", |
323 | 327 | "\n", |
324 | 328 | "\n", |
325 | 329 | "\n", |
|
338 | 342 | } |
339 | 343 | ], |
340 | 344 | "source": [ |
341 | | - "\n", |
342 | 345 | "def main():\n", |
343 | | - " train_path = 'conlldata/train.txt'\n", |
344 | | - " test_path = 'conlldata/test.txt'\n", |
| 346 | + " \n", |
| 347 | + " try:\n", |
| 348 | + " from google.colab import files\n", |
| 349 | + " uploaded = files.upload()\n", |
| 350 | + " # files are present in Data/conlldata\n", |
| 351 | + " train_path = 'train.txt'\n", |
| 352 | + " test_path = 'test.txt'\n", |
| 353 | + " except:\n", |
| 354 | + " train_path = 'Data/conlldata/train.txt'\n", |
| 355 | + " test_path = 'Data/conlldata/test.txt'\n", |
| 356 | + " \n", |
345 | 357 | " conll_train = load__data_conll(train_path)\n", |
346 | 358 | " conll_dev = load__data_conll(test_path)\n", |
347 | 359 | " \n", |
|
352 | 364 | " print(\"Done with sequence model\")\n", |
353 | 365 | "\n", |
354 | 366 | "if __name__==\"__main__\":\n", |
355 | | - " main()\n" |
| 367 | + " main()" |
356 | 368 | ] |
357 | 369 | }, |
358 | 370 | { |
|
364 | 376 | "source": [ |
365 | 377 | "This is pretty good. We already have a model which has an F-score of 92%!!!" |
366 | 378 | ] |
367 | | - }, |
368 | | - { |
369 | | - "cell_type": "code", |
370 | | - "execution_count": 0, |
371 | | - "metadata": { |
372 | | - "colab": {}, |
373 | | - "colab_type": "code", |
374 | | - "id": "reO-wnZocnB2" |
375 | | - }, |
376 | | - "outputs": [], |
377 | | - "source": [] |
378 | 379 | } |
379 | 380 | ], |
380 | 381 | "metadata": { |
|
383 | 384 | "provenance": [] |
384 | 385 | }, |
385 | 386 | "kernelspec": { |
386 | | - "display_name": "Python 2", |
| 387 | + "display_name": "Python 3", |
387 | 388 | "language": "python", |
388 | | - "name": "python2" |
| 389 | + "name": "python3" |
389 | 390 | }, |
390 | 391 | "language_info": { |
391 | 392 | "codemirror_mode": { |
392 | 393 | "name": "ipython", |
393 | | - "version": 2 |
| 394 | + "version": 3 |
394 | 395 | }, |
395 | 396 | "file_extension": ".py", |
396 | 397 | "mimetype": "text/x-python", |
397 | 398 | "name": "python", |
398 | 399 | "nbconvert_exporter": "python", |
399 | | - "pygments_lexer": "ipython2", |
400 | | - "version": "2.7.17" |
| 400 | + "pygments_lexer": "ipython3", |
| 401 | + "version": "3.7.4" |
401 | 402 | } |
402 | 403 | }, |
403 | 404 | "nbformat": 4, |
|
0 commit comments