Skip to content

Commit 9eefbaa

Browse files
Merge pull request #66 from jatinpapreja/master
Modifications for Windows support in Chapters 4, 5, 6, 10 and 11
2 parents 54babcc + 77d21b9 commit 9eefbaa

15 files changed

+6657
-6239
lines changed

Ch10/01_BioBERT_Demo.ipynb

Lines changed: 1914 additions & 1897 deletions
Large diffs are not rendered by default.

Ch10/02_LexNLP.ipynb

Lines changed: 126 additions & 438 deletions
Large diffs are not rendered by default.

Ch10/03_FinBERT.ipynb

Lines changed: 1545 additions & 1486 deletions
Large diffs are not rendered by default.

Ch11/01_AutoML_Demo.ipynb

Lines changed: 318 additions & 330 deletions
Large diffs are not rendered by default.

Ch4/05_DeepNN_Example.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@
7777
"except ModuleNotFoundError:\n",
7878
" \n",
7979
" if not os.path.exists(os.getcwd()+'\\\\Data\\\\glove.6B'):\n",
80-
" os.makdir(os.getcwd()+'\\\\Data\\\\glove.6B')\n",
80+
" os.mkdir(os.getcwd()+'\\\\Data\\\\glove.6B')\n",
8181
" \n",
8282
" url='http://nlp.stanford.edu/data/glove.6B.zip' \n",
8383
" path=os.getcwd()+'\\Data' \n",
@@ -549,7 +549,7 @@
549549
"name": "python",
550550
"nbconvert_exporter": "python",
551551
"pygments_lexer": "ipython3",
552-
"version": "3.7.0"
552+
"version": "3.7.4"
553553
}
554554
},
555555
"nbformat": 4,

Ch5/01_KPE.ipynb

Lines changed: 291 additions & 105 deletions
Large diffs are not rendered by default.

Ch5/02_NERTraining.ipynb

Lines changed: 46 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
},
2323
{
2424
"cell_type": "code",
25-
"execution_count": 0,
25+
"execution_count": 1,
2626
"metadata": {
2727
"colab": {},
2828
"colab_type": "code",
@@ -37,7 +37,9 @@
3737
"from pprint import pprint\n",
3838
"from sklearn.metrics import f1_score,classification_report\n",
3939
"from sklearn.pipeline import Pipeline\n",
40-
"import string\n"
40+
"import string\n",
41+
"import warnings\n",
42+
"warnings.filterwarnings('ignore')"
4143
]
4244
},
4345
{
@@ -52,7 +54,7 @@
5254
},
5355
{
5456
"cell_type": "code",
55-
"execution_count": 0,
57+
"execution_count": 2,
5658
"metadata": {
5759
"colab": {},
5860
"colab_type": "code",
@@ -79,12 +81,12 @@
7981
" words.append(word)\n",
8082
" tags.append(tag)\n",
8183
" fh.close()\n",
82-
" return myoutput\n"
84+
" return myoutput"
8385
]
8486
},
8587
{
8688
"cell_type": "code",
87-
"execution_count": 0,
89+
"execution_count": 3,
8890
"metadata": {
8991
"colab": {},
9092
"colab_type": "code",
@@ -150,7 +152,7 @@
150152
" wordfeats[\"nextNextTag\"] = sen_tags[i + 2][1]\n",
151153
" #That is it! You can add whatever you want!\n",
152154
" feats.append(wordfeats)\n",
153-
" return feats\n"
155+
" return feats"
154156
]
155157
},
156158
{
@@ -165,7 +167,7 @@
165167
},
166168
{
167169
"cell_type": "code",
168-
"execution_count": 0,
170+
"execution_count": 4,
169171
"metadata": {
170172
"colab": {},
171173
"colab_type": "code",
@@ -195,7 +197,7 @@
195197
},
196198
{
197199
"cell_type": "code",
198-
"execution_count": 0,
200+
"execution_count": 5,
199201
"metadata": {
200202
"colab": {},
201203
"colab_type": "code",
@@ -217,7 +219,7 @@
217219
" print(metrics.flat_f1_score(Y_dev, y_pred,average='weighted', labels=labels))\n",
218220
" print(metrics.flat_classification_report(Y_dev, y_pred, labels=sorted_labels, digits=3))\n",
219221
" #print(metrics.sequence_accuracy_score(Y_dev, y_pred))\n",
220-
" get_confusion_matrix(Y_dev, y_pred,labels=sorted_labels)\n"
222+
" get_confusion_matrix(Y_dev, y_pred,labels=sorted_labels)"
221223
]
222224
},
223225
{
@@ -232,7 +234,7 @@
232234
},
233235
{
234236
"cell_type": "code",
235-
"execution_count": 0,
237+
"execution_count": 6,
236238
"metadata": {
237239
"colab": {},
238240
"colab_type": "code",
@@ -258,12 +260,12 @@
258260
" cell = \"%{0}.0f\".format(columnwidth) % cm[i, j]\n",
259261
" sum = sum + int(cell)\n",
260262
" print(cell, end=\" \")\n",
261-
" print(sum) #Prints the total number of instances per cat at the end.\n"
263+
" print(sum) #Prints the total number of instances per cat at the end."
262264
]
263265
},
264266
{
265267
"cell_type": "code",
266-
"execution_count": 0,
268+
"execution_count": 7,
267269
"metadata": {
268270
"colab": {},
269271
"colab_type": "code",
@@ -278,7 +280,7 @@
278280
" for yseq_true, yseq_pred in zip(y_true, y_pred):\n",
279281
" trues.extend(yseq_true)\n",
280282
" preds.extend(yseq_pred)\n",
281-
" print_cm(confusion_matrix(trues,preds,labels),labels)\n"
283+
" print_cm(confusion_matrix(trues,preds,labels),labels)"
282284
]
283285
},
284286
{
@@ -293,7 +295,7 @@
293295
},
294296
{
295297
"cell_type": "code",
296-
"execution_count": 0,
298+
"execution_count": 8,
297299
"metadata": {
298300
"colab": {},
299301
"colab_type": "code",
@@ -307,19 +309,21 @@
307309
"text": [
308310
"Training a Sequence classification model with CRF\n",
309311
"0.9255103670420659\n",
310-
" precision recall f1-score support\n",
312+
" precision recall f1-score support\n",
311313
"\n",
312-
" O 0.973 0.981 0.977 38323\n",
313-
" B-LOC 0.694 0.765 0.728 1668\n",
314-
" I-LOC 0.738 0.482 0.584 257\n",
315-
" B-MISC 0.648 0.309 0.419 702\n",
316-
" I-MISC 0.626 0.505 0.559 216\n",
317-
" B-ORG 0.670 0.561 0.611 1661\n",
318-
" I-ORG 0.551 0.704 0.618 835\n",
319-
" B-PER 0.773 0.766 0.769 1617\n",
320-
" I-PER 0.819 0.886 0.851 1156\n",
314+
" O 0.973 0.981 0.977 38323\n",
315+
" B-LOC 0.694 0.765 0.728 1668\n",
316+
" I-LOC 0.738 0.482 0.584 257\n",
317+
" B-MISC 0.648 0.309 0.419 702\n",
318+
" I-MISC 0.626 0.505 0.559 216\n",
319+
" B-ORG 0.670 0.561 0.611 1661\n",
320+
" I-ORG 0.551 0.704 0.618 835\n",
321+
" B-PER 0.773 0.766 0.769 1617\n",
322+
" I-PER 0.819 0.886 0.851 1156\n",
321323
"\n",
322-
"avg / total 0.926 0.928 0.926 46435\n",
324+
" accuracy 0.928 46435\n",
325+
" macro avg 0.721 0.662 0.679 46435\n",
326+
"weighted avg 0.926 0.928 0.926 46435\n",
323327
"\n",
324328
"\n",
325329
"\n",
@@ -338,10 +342,18 @@
338342
}
339343
],
340344
"source": [
341-
"\n",
342345
"def main():\n",
343-
" train_path = 'conlldata/train.txt'\n",
344-
" test_path = 'conlldata/test.txt'\n",
346+
" \n",
347+
" try:\n",
348+
" from google.colab import files\n",
349+
" uploaded = files.upload()\n",
350+
" # files are present in Data/conlldata\n",
351+
" train_path = 'train.txt'\n",
352+
" test_path = 'test.txt'\n",
353+
" except:\n",
354+
" train_path = 'Data/conlldata/train.txt'\n",
355+
" test_path = 'Data/conlldata/test.txt'\n",
356+
" \n",
345357
" conll_train = load__data_conll(train_path)\n",
346358
" conll_dev = load__data_conll(test_path)\n",
347359
" \n",
@@ -352,7 +364,7 @@
352364
" print(\"Done with sequence model\")\n",
353365
"\n",
354366
"if __name__==\"__main__\":\n",
355-
" main()\n"
367+
" main()"
356368
]
357369
},
358370
{
@@ -364,17 +376,6 @@
364376
"source": [
365377
"This is pretty good. We already have a model which has an F-score of 92%!!!"
366378
]
367-
},
368-
{
369-
"cell_type": "code",
370-
"execution_count": 0,
371-
"metadata": {
372-
"colab": {},
373-
"colab_type": "code",
374-
"id": "reO-wnZocnB2"
375-
},
376-
"outputs": [],
377-
"source": []
378379
}
379380
],
380381
"metadata": {
@@ -383,21 +384,21 @@
383384
"provenance": []
384385
},
385386
"kernelspec": {
386-
"display_name": "Python 2",
387+
"display_name": "Python 3",
387388
"language": "python",
388-
"name": "python2"
389+
"name": "python3"
389390
},
390391
"language_info": {
391392
"codemirror_mode": {
392393
"name": "ipython",
393-
"version": 2
394+
"version": 3
394395
},
395396
"file_extension": ".py",
396397
"mimetype": "text/x-python",
397398
"name": "python",
398399
"nbconvert_exporter": "python",
399-
"pygments_lexer": "ipython2",
400-
"version": "2.7.17"
400+
"pygments_lexer": "ipython3",
401+
"version": "3.7.4"
401402
}
402403
},
403404
"nbformat": 4,

0 commit comments

Comments
 (0)