Skip to content

Commit 9dd67ba

Browse files
fixed print statements and comments in ch3 nb6
1 parent b1c14ff commit 9dd67ba

File tree

1 file changed

+94
-92
lines changed

1 file changed

+94
-92
lines changed

Ch3/06_Training_embeddings_using_gensim.ipynb

Lines changed: 94 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
"execution_count": 1,
1717
"metadata": {
1818
"ExecuteTime": {
19-
"end_time": "2021-04-03T08:59:14.632353Z",
20-
"start_time": "2021-04-03T08:59:14.188951Z"
19+
"end_time": "2021-04-05T21:26:40.863650Z",
20+
"start_time": "2021-04-05T21:26:40.339123Z"
2121
},
2222
"colab": {},
2323
"colab_type": "code",
@@ -35,8 +35,8 @@
3535
"execution_count": 2,
3636
"metadata": {
3737
"ExecuteTime": {
38-
"end_time": "2021-04-03T08:59:14.664303Z",
39-
"start_time": "2021-04-03T08:59:14.633352Z"
38+
"end_time": "2021-04-05T21:26:40.894143Z",
39+
"start_time": "2021-04-05T21:26:40.865114Z"
4040
},
4141
"colab": {},
4242
"colab_type": "code",
@@ -70,8 +70,8 @@
7070
"execution_count": 3,
7171
"metadata": {
7272
"ExecuteTime": {
73-
"end_time": "2021-04-03T08:59:14.679318Z",
74-
"start_time": "2021-04-03T08:59:14.665306Z"
73+
"end_time": "2021-04-05T21:26:56.724662Z",
74+
"start_time": "2021-04-05T21:26:56.712651Z"
7575
},
7676
"colab": {
7777
"base_uri": "https://localhost:8080/",
@@ -88,31 +88,31 @@
8888
"text": [
8989
"Word2Vec(vocab=6, size=100, alpha=0.025)\n",
9090
"['dog', 'bites', 'man', 'eats', 'meat', 'food']\n",
91-
"[-1.1890394e-04 -2.7612262e-04 3.0016506e-03 3.3397041e-03\n",
92-
" 2.6973987e-03 2.5381467e-03 -4.4830954e-03 -3.8807455e-03\n",
93-
" -2.7481976e-03 -3.5091466e-03 -1.0663099e-03 3.8600836e-03\n",
94-
" -1.8223912e-03 -1.8985753e-03 2.5068773e-03 5.8603484e-05\n",
95-
" -1.8388843e-03 3.4894156e-03 -1.9410843e-03 1.9663016e-03\n",
96-
" -1.9262581e-04 -1.8321032e-04 4.6618818e-03 2.0332152e-03\n",
97-
" -5.5621512e-04 -5.0049595e-04 4.4950778e-03 -2.3708560e-03\n",
98-
" -4.1250056e-03 -8.1713696e-04 -1.5846886e-03 2.6569276e-03\n",
99-
" -2.4425923e-03 3.3881937e-03 4.8663849e-03 -3.1806210e-03\n",
100-
" 6.0354080e-04 2.6283797e-03 3.2367259e-03 -4.4542220e-03\n",
101-
" -4.3623694e-03 -4.9372590e-03 3.1183651e-03 2.6437298e-03\n",
102-
" -3.1073038e-03 7.5010926e-04 3.5182503e-03 -2.6689377e-03\n",
103-
" 4.2944783e-03 1.2430353e-03 2.1388694e-03 1.5726388e-03\n",
104-
" -3.4201301e-03 -3.5607379e-03 3.4647183e-03 -9.6110179e-04\n",
105-
" -2.5040556e-03 -9.6717122e-04 1.0441509e-03 -3.4992509e-03\n",
106-
" -9.8467432e-04 2.5085383e-03 3.4381317e-03 -8.5586461e-04\n",
107-
" -4.3379996e-04 2.0993554e-03 -3.3381197e-03 3.6710135e-03\n",
108-
" 2.4826424e-03 7.7588746e-04 -3.6549675e-03 2.5771847e-03\n",
109-
" -3.9825556e-03 -6.0248183e-04 -5.7223073e-04 -1.7433831e-03\n",
110-
" -1.0604414e-03 -2.1816064e-03 -4.6085631e-03 2.3315020e-03\n",
111-
" 2.3816996e-03 1.9949675e-03 -4.0842607e-03 -2.8094815e-04\n",
112-
" -4.2685810e-03 -1.3998528e-03 1.7278946e-03 -2.2190765e-03\n",
113-
" -2.3720833e-04 -4.0732473e-03 -5.0638389e-04 -2.4232429e-03\n",
114-
" -1.9645202e-03 -2.8262585e-03 7.5944123e-04 1.1781134e-03\n",
115-
" 4.9539114e-04 -1.1337005e-03 -3.3781745e-03 1.0580849e-03]\n"
91+
"[-3.1667745e-03 2.5268614e-03 -4.9504861e-03 2.3797194e-03\n",
92+
" -3.3511904e-03 1.7659335e-03 -9.6838089e-04 3.6862001e-03\n",
93+
" 3.3760078e-03 -1.1944126e-03 -4.7475514e-03 -4.6677454e-03\n",
94+
" 4.7231275e-03 2.1875298e-03 4.9989321e-03 -4.7024325e-04\n",
95+
" 4.6936749e-03 4.5417100e-03 -4.8383311e-03 4.5522186e-03\n",
96+
" 9.4010920e-04 -2.8778350e-03 -2.3938445e-03 7.6240452e-04\n",
97+
" 2.8537741e-05 -1.0585956e-03 1.5203804e-03 1.1994856e-04\n",
98+
" 4.3881699e-03 3.5755127e-04 1.9964906e-03 -3.3893189e-03\n",
99+
" 2.5362791e-03 -3.8559963e-03 -4.6814438e-03 -1.0485576e-03\n",
100+
" 1.9576577e-03 -5.4296525e-04 2.5505766e-03 1.4563937e-03\n",
101+
" 1.1214090e-03 3.1200200e-03 3.5230191e-03 4.4931062e-03\n",
102+
" -5.5389071e-04 1.6268899e-03 -4.6736463e-03 -1.9612674e-04\n",
103+
" 1.5486709e-03 -3.5581242e-03 1.5163666e-03 2.2859944e-03\n",
104+
" -3.5728619e-03 -3.5505979e-03 7.8282715e-04 -4.8093311e-03\n",
105+
" -3.1324120e-03 -3.6213300e-03 -1.4478542e-03 3.4006054e-03\n",
106+
" 2.2276146e-03 -4.1698264e-03 -3.6997625e-03 -4.1264743e-03\n",
107+
" -4.9103238e-03 -2.2635974e-03 -3.9036905e-03 3.8846405e-03\n",
108+
" -7.9726276e-05 -2.0692295e-03 -3.0645117e-04 -3.0288144e-03\n",
109+
" -3.4682599e-03 -3.1768843e-03 -1.1148058e-03 -2.8012963e-03\n",
110+
" -6.5973290e-04 -2.3705217e-03 4.3961490e-03 3.2166531e-03\n",
111+
" 3.6933657e-04 -6.2054797e-04 2.0661615e-04 3.7390803e-04\n",
112+
" -3.5061471e-03 3.6587315e-03 2.1328868e-03 -2.5964181e-03\n",
113+
" 4.3381471e-03 4.0168604e-03 1.8054987e-03 -1.2192487e-03\n",
114+
" 1.5615283e-03 -1.8635839e-03 2.9529419e-03 -3.3825964e-03\n",
115+
" -3.2592549e-03 -4.7523994e-04 -5.3210353e-04 -9.8173530e-04]\n"
116116
]
117117
}
118118
],
@@ -133,8 +133,8 @@
133133
"execution_count": 4,
134134
"metadata": {
135135
"ExecuteTime": {
136-
"end_time": "2021-04-03T08:59:14.694928Z",
137-
"start_time": "2021-04-03T08:59:14.680319Z"
136+
"end_time": "2021-04-05T21:26:57.420196Z",
137+
"start_time": "2021-04-05T21:26:57.417193Z"
138138
},
139139
"colab": {
140140
"base_uri": "https://localhost:8080/",
@@ -149,8 +149,8 @@
149149
"name": "stdout",
150150
"output_type": "stream",
151151
"text": [
152-
"Similarity between eats and bites: -0.13728619\n",
153-
"Similarity between eats and man: -0.19164583\n"
152+
"Similarity between eats and bites: -0.09852024\n",
153+
"Similarity between eats and man: -0.17088428\n"
154154
]
155155
}
156156
],
@@ -175,8 +175,8 @@
175175
"execution_count": 5,
176176
"metadata": {
177177
"ExecuteTime": {
178-
"end_time": "2021-04-03T08:59:14.710944Z",
179-
"start_time": "2021-04-03T08:59:14.695930Z"
178+
"end_time": "2021-04-05T21:26:59.635831Z",
179+
"start_time": "2021-04-05T21:26:59.621818Z"
180180
},
181181
"colab": {
182182
"base_uri": "https://localhost:8080/",
@@ -190,11 +190,11 @@
190190
{
191191
"data": {
192192
"text/plain": [
193-
"[('man', 0.12813393771648407),\n",
194-
" ('dog', 0.11004816740751266),\n",
195-
" ('food', 0.005883853882551193),\n",
196-
" ('bites', -0.056721072643995285),\n",
197-
" ('eats', -0.09314743429422379)]"
193+
"[('bites', 0.1353721022605896),\n",
194+
" ('man', 0.1094527617096901),\n",
195+
" ('food', -0.02215239405632019),\n",
196+
" ('dog', -0.1444159597158432),\n",
197+
" ('eats', -0.16309654712677002)]"
198198
]
199199
},
200200
"execution_count": 5,
@@ -212,8 +212,8 @@
212212
"execution_count": 6,
213213
"metadata": {
214214
"ExecuteTime": {
215-
"end_time": "2021-04-03T08:59:14.726958Z",
216-
"start_time": "2021-04-03T08:59:14.711944Z"
215+
"end_time": "2021-04-05T21:26:59.855822Z",
216+
"start_time": "2021-04-05T21:26:59.841810Z"
217217
},
218218
"colab": {
219219
"base_uri": "https://localhost:8080/",
@@ -257,8 +257,8 @@
257257
"execution_count": 7,
258258
"metadata": {
259259
"ExecuteTime": {
260-
"end_time": "2021-04-03T08:59:14.742937Z",
261-
"start_time": "2021-04-03T08:59:14.727959Z"
260+
"end_time": "2021-04-05T21:27:00.517046Z",
261+
"start_time": "2021-04-05T21:27:00.508038Z"
262262
},
263263
"colab": {
264264
"base_uri": "https://localhost:8080/",
@@ -275,31 +275,31 @@
275275
"text": [
276276
"Word2Vec(vocab=6, size=100, alpha=0.025)\n",
277277
"['dog', 'bites', 'man', 'eats', 'meat', 'food']\n",
278-
"[-1.1890394e-04 -2.7612262e-04 3.0016506e-03 3.3397041e-03\n",
279-
" 2.6973987e-03 2.5381467e-03 -4.4830954e-03 -3.8807455e-03\n",
280-
" -2.7481976e-03 -3.5091466e-03 -1.0663099e-03 3.8600836e-03\n",
281-
" -1.8223912e-03 -1.8985753e-03 2.5068773e-03 5.8603484e-05\n",
282-
" -1.8388843e-03 3.4894156e-03 -1.9410843e-03 1.9663016e-03\n",
283-
" -1.9262581e-04 -1.8321032e-04 4.6618818e-03 2.0332152e-03\n",
284-
" -5.5621512e-04 -5.0049595e-04 4.4950778e-03 -2.3708560e-03\n",
285-
" -4.1250056e-03 -8.1713696e-04 -1.5846886e-03 2.6569276e-03\n",
286-
" -2.4425923e-03 3.3881937e-03 4.8663849e-03 -3.1806210e-03\n",
287-
" 6.0354080e-04 2.6283797e-03 3.2367259e-03 -4.4542220e-03\n",
288-
" -4.3623694e-03 -4.9372590e-03 3.1183651e-03 2.6437298e-03\n",
289-
" -3.1073038e-03 7.5010926e-04 3.5182503e-03 -2.6689377e-03\n",
290-
" 4.2944783e-03 1.2430353e-03 2.1388694e-03 1.5726388e-03\n",
291-
" -3.4201301e-03 -3.5607379e-03 3.4647183e-03 -9.6110179e-04\n",
292-
" -2.5040556e-03 -9.6717122e-04 1.0441509e-03 -3.4992509e-03\n",
293-
" -9.8467432e-04 2.5085383e-03 3.4381317e-03 -8.5586461e-04\n",
294-
" -4.3379996e-04 2.0993554e-03 -3.3381197e-03 3.6710135e-03\n",
295-
" 2.4826424e-03 7.7588746e-04 -3.6549675e-03 2.5771847e-03\n",
296-
" -3.9825556e-03 -6.0248183e-04 -5.7223073e-04 -1.7433831e-03\n",
297-
" -1.0604414e-03 -2.1816064e-03 -4.6085631e-03 2.3315020e-03\n",
298-
" 2.3816996e-03 1.9949675e-03 -4.0842607e-03 -2.8094815e-04\n",
299-
" -4.2685810e-03 -1.3998528e-03 1.7278946e-03 -2.2190765e-03\n",
300-
" -2.3720833e-04 -4.0732473e-03 -5.0638389e-04 -2.4232429e-03\n",
301-
" -1.9645202e-03 -2.8262585e-03 7.5944123e-04 1.1781134e-03\n",
302-
" 4.9539114e-04 -1.1337005e-03 -3.3781745e-03 1.0580849e-03]\n"
278+
"[-3.1667745e-03 2.5268614e-03 -4.9504861e-03 2.3797194e-03\n",
279+
" -3.3511904e-03 1.7659335e-03 -9.6838089e-04 3.6862001e-03\n",
280+
" 3.3760078e-03 -1.1944126e-03 -4.7475514e-03 -4.6677454e-03\n",
281+
" 4.7231275e-03 2.1875298e-03 4.9989321e-03 -4.7024325e-04\n",
282+
" 4.6936749e-03 4.5417100e-03 -4.8383311e-03 4.5522186e-03\n",
283+
" 9.4010920e-04 -2.8778350e-03 -2.3938445e-03 7.6240452e-04\n",
284+
" 2.8537741e-05 -1.0585956e-03 1.5203804e-03 1.1994856e-04\n",
285+
" 4.3881699e-03 3.5755127e-04 1.9964906e-03 -3.3893189e-03\n",
286+
" 2.5362791e-03 -3.8559963e-03 -4.6814438e-03 -1.0485576e-03\n",
287+
" 1.9576577e-03 -5.4296525e-04 2.5505766e-03 1.4563937e-03\n",
288+
" 1.1214090e-03 3.1200200e-03 3.5230191e-03 4.4931062e-03\n",
289+
" -5.5389071e-04 1.6268899e-03 -4.6736463e-03 -1.9612674e-04\n",
290+
" 1.5486709e-03 -3.5581242e-03 1.5163666e-03 2.2859944e-03\n",
291+
" -3.5728619e-03 -3.5505979e-03 7.8282715e-04 -4.8093311e-03\n",
292+
" -3.1324120e-03 -3.6213300e-03 -1.4478542e-03 3.4006054e-03\n",
293+
" 2.2276146e-03 -4.1698264e-03 -3.6997625e-03 -4.1264743e-03\n",
294+
" -4.9103238e-03 -2.2635974e-03 -3.9036905e-03 3.8846405e-03\n",
295+
" -7.9726276e-05 -2.0692295e-03 -3.0645117e-04 -3.0288144e-03\n",
296+
" -3.4682599e-03 -3.1768843e-03 -1.1148058e-03 -2.8012963e-03\n",
297+
" -6.5973290e-04 -2.3705217e-03 4.3961490e-03 3.2166531e-03\n",
298+
" 3.6933657e-04 -6.2054797e-04 2.0661615e-04 3.7390803e-04\n",
299+
" -3.5061471e-03 3.6587315e-03 2.1328868e-03 -2.5964181e-03\n",
300+
" 4.3381471e-03 4.0168604e-03 1.8054987e-03 -1.2192487e-03\n",
301+
" 1.5615283e-03 -1.8635839e-03 2.9529419e-03 -3.3825964e-03\n",
302+
" -3.2592549e-03 -4.7523994e-04 -5.3210353e-04 -9.8173530e-04]\n"
303303
]
304304
}
305305
],
@@ -320,8 +320,8 @@
320320
"execution_count": 8,
321321
"metadata": {
322322
"ExecuteTime": {
323-
"end_time": "2021-04-03T08:59:14.758953Z",
324-
"start_time": "2021-04-03T08:59:14.743938Z"
323+
"end_time": "2021-04-05T21:27:02.660747Z",
324+
"start_time": "2021-04-05T21:27:02.642866Z"
325325
},
326326
"colab": {
327327
"base_uri": "https://localhost:8080/",
@@ -336,8 +336,8 @@
336336
"name": "stdout",
337337
"output_type": "stream",
338338
"text": [
339-
"Similarity between eats and bites: -0.13728109\n",
340-
"Similarity between eats and man: -0.19165389\n"
339+
"Similarity between eats and bites: -0.09852936\n",
340+
"Similarity between eats and man: -0.17089055\n"
341341
]
342342
}
343343
],
@@ -362,8 +362,8 @@
362362
"execution_count": 9,
363363
"metadata": {
364364
"ExecuteTime": {
365-
"end_time": "2021-04-03T08:59:14.774064Z",
366-
"start_time": "2021-04-03T08:59:14.759954Z"
365+
"end_time": "2021-04-05T21:27:03.419546Z",
366+
"start_time": "2021-04-05T21:27:03.414541Z"
367367
},
368368
"colab": {
369369
"base_uri": "https://localhost:8080/",
@@ -377,11 +377,11 @@
377377
{
378378
"data": {
379379
"text/plain": [
380-
"[('man', 0.12813392281532288),\n",
381-
" ('dog', 0.11004817485809326),\n",
382-
" ('food', 0.005883842706680298),\n",
383-
" ('bites', -0.056721076369285583),\n",
384-
" ('eats', -0.09321994334459305)]"
380+
"[('bites', 0.1353721022605896),\n",
381+
" ('man', 0.10945276916027069),\n",
382+
" ('food', -0.022152386605739594),\n",
383+
" ('dog', -0.1444159746170044),\n",
384+
" ('eats', -0.16317100822925568)]"
385385
]
386386
},
387387
"execution_count": 9,
@@ -399,8 +399,8 @@
399399
"execution_count": 10,
400400
"metadata": {
401401
"ExecuteTime": {
402-
"end_time": "2021-04-03T08:59:14.790080Z",
403-
"start_time": "2021-04-03T08:59:14.775066Z"
402+
"end_time": "2021-04-05T21:27:03.973454Z",
403+
"start_time": "2021-04-05T21:27:03.950433Z"
404404
},
405405
"colab": {
406406
"base_uri": "https://localhost:8080/",
@@ -448,19 +448,20 @@
448448
},
449449
{
450450
"cell_type": "code",
451-
"execution_count": 11,
451+
"execution_count": 12,
452452
"metadata": {
453453
"ExecuteTime": {
454-
"end_time": "2021-04-03T08:59:14.806094Z",
455-
"start_time": "2021-04-03T08:59:14.791080Z"
454+
"end_time": "2021-04-05T21:27:58.596845Z",
455+
"start_time": "2021-04-05T21:27:58.585833Z"
456456
}
457457
},
458458
"outputs": [
459459
{
460460
"name": "stdout",
461461
"output_type": "stream",
462462
"text": [
463-
"file already exists, skipping download\n"
463+
"file already exists, skipping download\n",
464+
"File at: data/en/enwiki-latest-pages-articles-multistream14.xml-p13159683p14324602.bz2\n"
464465
]
465466
}
466467
],
@@ -483,7 +484,6 @@
483484
" if token:\n",
484485
" params = { 'id' : id, 'confirm' : token }\n",
485486
" response = session.get(URL, params = params, stream = True)\n",
486-
" print(len(response.content))\n",
487487
"\n",
488488
" save_response_content(response, destination) \n",
489489
"\n",
@@ -505,7 +505,9 @@
505505
"if not os.path.exists(file_name):\n",
506506
" download_file_from_google_drive(file_id, file_name)\n",
507507
"else:\n",
508-
" print(\"file already exists, skipping download\")"
508+
" print(\"file already exists, skipping download\")\n",
509+
"\n",
510+
"print(f\"File at: {file_name}\")"
509511
]
510512
},
511513
{
@@ -549,8 +551,8 @@
549551
"#if you get a memory error executing the lines above\n",
550552
"#comment the lines out and uncomment the lines below. \n",
551553
"#loading will be slower, but stable.\n",
552-
"wiki = WikiCorpus(file_name, processes=4, lemmatize=False, dictionary={})\n",
553-
"sentences = list(wiki.get_texts())\n",
554+
"# wiki = WikiCorpus(file_name, processes=4, lemmatize=False, dictionary={})\n",
555+
"# sentences = list(wiki.get_texts())\n",
554556
"\n",
555557
"#if you still get a memory error, try settings processes to 1 or 2 and then run it again."
556558
]

0 commit comments

Comments
 (0)