From fb365f46520087dbd3bd2a1d725ed69ce1e60fd4 Mon Sep 17 00:00:00 2001 From: Kedar Thakkar Date: Sat, 25 Nov 2023 12:30:30 +0530 Subject: [PATCH 01/18] Removes old commits --- .gitignore | 3 ++- requirements.txt | 8 ++++---- train.cfg | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index ccd3731..9e82841 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,9 @@ +data/ experiments/ -data/rotowire/ *.txt __pycache__/ .ipynb_checkpoints/ *.pyc *.bz2 *.tar +*.DS_Store diff --git a/requirements.txt b/requirements.txt index 4810274..fede69e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ bleach==3.3.0 certifi==2019.11.28 cffi==1.13.2 chardet==3.0.4 -ConfigArgParse==0.14.0 +ConfigArgParse cycler==0.10.0 decorator==4.4.1 defusedxml==0.6.0 @@ -17,7 +17,7 @@ ipython-genutils==0.2.0 ipywidgets==7.5.1 jedi==0.15.1 Jinja2==2.11.3 -json5==0.8.5 +json5 jsonschema==3.2.0 jupyter==1.0.0 jupyter-client==5.3.4 @@ -47,7 +47,7 @@ Pygments==2.7.4 pyparsing==2.4.5 pyrsistent==0.15.6 python-dateutil==2.8.1 -PyYAML==5.4 +PyYAML pyzmq==18.1.1 qtconsole==4.6.0 requests==2.22.0 @@ -56,7 +56,7 @@ simplejson==3.17.0 six==1.13.0 terminado==0.8.3 testpath==0.4.4 -torch==1.1.0 +torch torchtext==0.4.0 tornado==6.0.3 tqdm==4.40.2 diff --git a/train.cfg b/train.cfg index 91f494f..b341fb3 100644 --- a/train.cfg +++ b/train.cfg @@ -61,7 +61,7 @@ save_checkpoint_steps: 500 # save a cp every X steps # Gpu related: -gpu_ranks: [0] # ids of gpus to use +gpu_ranks: [] # ids of gpus to use world_size: 1 # total number of distributed processes gpu_backend: nccl # type of torch distributed backend gpu_verbose_level: 0 @@ -77,7 +77,7 @@ normalization: sents accum_count: [2] # Update weights every X batches accum_steps: [0] # steps at which accum counts value changes valid_steps: 500 # run models on validation set every X steps -train_steps: 30000 +train_steps: 100 optim: adam max_grad_norm: 5 dropout: .5 From be6d39d77220dd68693ac8e3072c6635cd19c6ff Mon Sep 17 00:00:00 2001 From: Kedar Thakkar Date: Sat, 25 Nov 2023 19:40:57 +0530 Subject: [PATCH 02/18] updates deps --- .gitignore | 1 + requirements.txt | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 9e82841..01d2b43 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ __pycache__/ *.bz2 *.tar *.DS_Store +*.idea diff --git a/requirements.txt b/requirements.txt index fede69e..4fc90a7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,7 +30,7 @@ lab==5.1 MarkupSafe==1.1.1 matplotlib==3.1.2 mistune==0.8.4 -more-itertools==8.0.2 +more-itertools nbconvert==5.6.1 nbformat==4.4.0 notebook==6.1.5 @@ -48,7 +48,7 @@ pyparsing==2.4.5 pyrsistent==0.15.6 python-dateutil==2.8.1 PyYAML -pyzmq==18.1.1 +pyzmq qtconsole==4.6.0 requests==2.22.0 Send2Trash==1.5.0 From 63fe915cc22d71bbfc23a6ba396a714b28134e25 Mon Sep 17 00:00:00 2001 From: Kedar Thakkar Date: Sat, 25 Nov 2023 19:43:02 +0530 Subject: [PATCH 03/18] updates --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 4fc90a7..713d3b8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ attrs==19.3.0 backcall==0.1.0 bleach==3.3.0 certifi==2019.11.28 -cffi==1.13.2 +cffi chardet==3.0.4 ConfigArgParse cycler==0.10.0 @@ -40,7 +40,7 @@ parso==0.5.2 pexpect==4.7.0 pickleshare==0.7.5 prometheus-client==0.7.1 -prompt-toolkit==2.0.10 +prompt-toolkit ptyprocess==0.6.0 pycparser==2.19 Pygments==2.7.4 From d8d09fa90e6abea335735320488a85d4976d9027 Mon Sep 17 00:00:00 2001 From: Kedar Thakkar Date: Sat, 25 Nov 2023 19:44:21 +0530 Subject: [PATCH 04/18] u --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 713d3b8..0c6677e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,7 +34,7 @@ more-itertools nbconvert==5.6.1 nbformat==4.4.0 notebook==6.1.5 -numpy==1.17.4 +numpy pandocfilters==1.4.2 parso==0.5.2 pexpect==4.7.0 From 2c8a8a9f9d277cd3b316b10ce09dfd19ac71cf20 Mon Sep 17 00:00:00 2001 From: Kedar Thakkar Date: Sat, 25 Nov 2023 19:47:45 +0530 Subject: [PATCH 05/18] remove all pinned dep --- requirements.txt | 118 +++++++++++++++++++++++------------------------ 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0c6677e..1d621c0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,68 +1,68 @@ -attrs==19.3.0 -backcall==0.1.0 -bleach==3.3.0 -certifi==2019.11.28 +attrs +backcall +bleach +certifi cffi -chardet==3.0.4 +chardet ConfigArgParse -cycler==0.10.0 -decorator==4.4.1 -defusedxml==0.6.0 -entrypoints==0.3 -idna==2.8 -importlib-metadata==1.3.0 -ipykernel==5.1.3 -ipython==7.10.2 -ipython-genutils==0.2.0 -ipywidgets==7.5.1 -jedi==0.15.1 -Jinja2==2.11.3 +cycler +decorator +defusedxml +entrypoints +idna +importlib-metadata +ipykernel +ipython +ipython-genutils +ipywidgets +jedi +Jinja2 json5 -jsonschema==3.2.0 -jupyter==1.0.0 -jupyter-client==5.3.4 -jupyter-console==6.0.0 -jupyter-core==4.6.1 -jupyterlab==1.2.4 -jupyterlab-server==1.0.6 -kiwisolver==1.1.0 -lab==5.1 -MarkupSafe==1.1.1 -matplotlib==3.1.2 -mistune==0.8.4 +jsonschema +jupyter +jupyter-client +jupyter-console +jupyter-core +jupyterlab +jupyterlab-server +kiwisolver +lab +MarkupSafe +matplotlib +mistune more-itertools -nbconvert==5.6.1 -nbformat==4.4.0 -notebook==6.1.5 +nbconvert +nbformat +notebook numpy -pandocfilters==1.4.2 -parso==0.5.2 -pexpect==4.7.0 -pickleshare==0.7.5 -prometheus-client==0.7.1 +pandocfilters +parso +pexpect +pickleshare +prometheus-client prompt-toolkit -ptyprocess==0.6.0 -pycparser==2.19 -Pygments==2.7.4 -pyparsing==2.4.5 -pyrsistent==0.15.6 -python-dateutil==2.8.1 +ptyprocess +pycparser +Pygments +pyparsing +pyrsistent +python-dateutil PyYAML pyzmq -qtconsole==4.6.0 -requests==2.22.0 -Send2Trash==1.5.0 -simplejson==3.17.0 -six==1.13.0 -terminado==0.8.3 -testpath==0.4.4 +qtconsole +requests +Send2Trash +simplejson +six +terminado +testpath torch -torchtext==0.4.0 -tornado==6.0.3 -tqdm==4.40.2 -traitlets==4.3.3 -urllib3==1.25.7 -wcwidth==0.1.7 -webencodings==0.5.1 -widgetsnbextension==3.5.1 -zipp==0.6.0 +torchtext +tornado +tqdm +traitlets +urllib3 +wcwidth +webencodings +widgetsnbextension +zipp From 3d0ec0d1f1281f4c55368fa6d0cbfdbf32fcb6c0 Mon Sep 17 00:00:00 2001 From: Kedar Thakkar Date: Sat, 25 Nov 2023 20:12:01 +0530 Subject: [PATCH 06/18] updates --- onmt/modules/self_attention.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/onmt/modules/self_attention.py b/onmt/modules/self_attention.py index 4c46b6d..3d2692e 100644 --- a/onmt/modules/self_attention.py +++ b/onmt/modules/self_attention.py @@ -4,7 +4,6 @@ It's actually the same module, with more or less flewibility at times, and a more flexible use of the mask (different mask per element of the batch) """ -from torch._jit_internal import weak_module, weak_script_method from torch.nn.init import constant_ from torch.nn.parameter import Parameter from torch.nn.init import xavier_uniform_ @@ -14,7 +13,6 @@ import torch -@weak_module class MultiHeadSelfAttention(torch.nn.Module): """ if glu_depth is not zero, we use GatedLinear layers instead of regular layers. @@ -59,7 +57,6 @@ def _reset_parameters(self): constant_(self.in_proj_bias, 0.) constant_(self.out_proj.bias, 0.) - @weak_script_method def forward(self, input, attn_mask=None): """ Inputs of forward function From e62ea67e115bd83c69b03b7a68d5c7522b2e0c08 Mon Sep 17 00:00:00 2001 From: Kedar Thakkar Date: Sat, 25 Nov 2023 20:17:42 +0530 Subject: [PATCH 07/18] Updates self attention --- onmt/modules/self_attention.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onmt/modules/self_attention.py b/onmt/modules/self_attention.py index 3d2692e..e0cf625 100644 --- a/onmt/modules/self_attention.py +++ b/onmt/modules/self_attention.py @@ -73,7 +73,7 @@ def forward(self, input, attn_mask=None): # self-attention q, k, v = F.linear(input, self.in_proj_weight, self.in_proj_bias).chunk(3, dim=-1) - q *= self.scaling + q = q * self.scaling # Cut q, k, v in num_heads part q = q.contiguous().view(seq_len, bsz * self.num_heads, self.head_dim).transpose(0, 1) From 09d93189a3a7ea1ec2cc7687880e0c7455621703 Mon Sep 17 00:00:00 2001 From: Kedar Thakkar Date: Sat, 25 Nov 2023 21:34:16 +0530 Subject: [PATCH 08/18] log loss more often --- train.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.cfg b/train.cfg index b341fb3..b516fed 100644 --- a/train.cfg +++ b/train.cfg @@ -56,7 +56,7 @@ data: experiments/exp-1/data/data # path to datafile from preprocess.py save_model: experiments/exp-1/models/model # path to store checkpoints log_file: experiments/exp-1/train-log.txt -report_every: 50 # log current loss every X steps +report_every: 10 # log current loss every X steps save_checkpoint_steps: 500 # save a cp every X steps From 86ffe215b206200bdd0880c15b8b32afd6dd79d4 Mon Sep 17 00:00:00 2001 From: Kedar Thakkar Date: Sun, 3 Dec 2023 23:14:53 -0500 Subject: [PATCH 09/18] print statements --- onmt/translate/beam_search.py | 1 + 1 file changed, 1 insertion(+) diff --git a/onmt/translate/beam_search.py b/onmt/translate/beam_search.py index d0ea981..f62c99c 100644 --- a/onmt/translate/beam_search.py +++ b/onmt/translate/beam_search.py @@ -187,6 +187,7 @@ def advance(self, log_probs, attn, attn_key): torch.mul(self.topk_scores, length_penalty, out=self.topk_log_probs) # Resolve beam origin and map to batch index flat representation. + print(type(self.topk_ids), type(self._batch_index)) torch.div(self.topk_ids, vocab_size, out=self._batch_index) self._batch_index += self._beam_offset[:_B].unsqueeze(1) self.select_indices = self._batch_index.view(_B * self.beam_size) From f3e4cf494ecdc4fb8e64e15427245ba195e29525 Mon Sep 17 00:00:00 2001 From: Kedar Thakkar Date: Sun, 3 Dec 2023 23:16:35 -0500 Subject: [PATCH 10/18] fix --- onmt/translate/beam_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onmt/translate/beam_search.py b/onmt/translate/beam_search.py index f62c99c..57952a2 100644 --- a/onmt/translate/beam_search.py +++ b/onmt/translate/beam_search.py @@ -187,7 +187,7 @@ def advance(self, log_probs, attn, attn_key): torch.mul(self.topk_scores, length_penalty, out=self.topk_log_probs) # Resolve beam origin and map to batch index flat representation. - print(type(self.topk_ids), type(self._batch_index)) + print(self.topk_ids.type(), self._batch_index.type()) torch.div(self.topk_ids, vocab_size, out=self._batch_index) self._batch_index += self._beam_offset[:_B].unsqueeze(1) self.select_indices = self._batch_index.view(_B * self.beam_size) From 67b18b901c172ac52a166942735cf3502f8ebf81 Mon Sep 17 00:00:00 2001 From: Kedar Thakkar Date: Sun, 3 Dec 2023 23:19:19 -0500 Subject: [PATCH 11/18] Fix --- onmt/translate/beam_search.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/onmt/translate/beam_search.py b/onmt/translate/beam_search.py index 57952a2..4a4d714 100644 --- a/onmt/translate/beam_search.py +++ b/onmt/translate/beam_search.py @@ -187,8 +187,7 @@ def advance(self, log_probs, attn, attn_key): torch.mul(self.topk_scores, length_penalty, out=self.topk_log_probs) # Resolve beam origin and map to batch index flat representation. - print(self.topk_ids.type(), self._batch_index.type()) - torch.div(self.topk_ids, vocab_size, out=self._batch_index) + self._batch_index = torch.div(self.topk_ids, vocab_size, out=self._batch_index.float()) self._batch_index += self._beam_offset[:_B].unsqueeze(1) self.select_indices = self._batch_index.view(_B * self.beam_size) self.topk_ids.fmod_(vocab_size) # resolve true word ids From 9c63c666452ec2c4f9d869ea1da61b058e531bc3 Mon Sep 17 00:00:00 2001 From: Kedar Thakkar Date: Sun, 3 Dec 2023 23:20:55 -0500 Subject: [PATCH 12/18] stab --- onmt/translate/beam_search.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onmt/translate/beam_search.py b/onmt/translate/beam_search.py index 4a4d714..f905ff6 100644 --- a/onmt/translate/beam_search.py +++ b/onmt/translate/beam_search.py @@ -187,9 +187,10 @@ def advance(self, log_probs, attn, attn_key): torch.mul(self.topk_scores, length_penalty, out=self.topk_log_probs) # Resolve beam origin and map to batch index flat representation. - self._batch_index = torch.div(self.topk_ids, vocab_size, out=self._batch_index.float()) + torch.div(self.topk_ids, vocab_size, out=self._batch_index.float()) self._batch_index += self._beam_offset[:_B].unsqueeze(1) self.select_indices = self._batch_index.view(_B * self.beam_size) + print(self.select_indices.type()) self.topk_ids.fmod_(vocab_size) # resolve true word ids # Append last prediction. From 353fb16a66e2e7f2f70b4fe63f1775db8c3a4c21 Mon Sep 17 00:00:00 2001 From: Kedar Thakkar Date: Sun, 3 Dec 2023 23:23:28 -0500 Subject: [PATCH 13/18] debug --- onmt/translate/beam_search.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onmt/translate/beam_search.py b/onmt/translate/beam_search.py index f905ff6..5f07979 100644 --- a/onmt/translate/beam_search.py +++ b/onmt/translate/beam_search.py @@ -190,10 +190,11 @@ def advance(self, log_probs, attn, attn_key): torch.div(self.topk_ids, vocab_size, out=self._batch_index.float()) self._batch_index += self._beam_offset[:_B].unsqueeze(1) self.select_indices = self._batch_index.view(_B * self.beam_size) - print(self.select_indices.type()) self.topk_ids.fmod_(vocab_size) # resolve true word ids # Append last prediction. + print(self.alive_seq.shape) + print(self.select_indices) self.alive_seq = torch.cat( [self.alive_seq.index_select(0, self.select_indices), self.topk_ids.view(_B * self.beam_size, 1)], -1) From 630eedc5ed3f219ae9f96cdb3bc86d4f16ad9cfc Mon Sep 17 00:00:00 2001 From: Kedar Thakkar Date: Sun, 3 Dec 2023 23:27:30 -0500 Subject: [PATCH 14/18] fix --- onmt/translate/beam_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onmt/translate/beam_search.py b/onmt/translate/beam_search.py index 5f07979..7eb8eb1 100644 --- a/onmt/translate/beam_search.py +++ b/onmt/translate/beam_search.py @@ -187,7 +187,7 @@ def advance(self, log_probs, attn, attn_key): torch.mul(self.topk_scores, length_penalty, out=self.topk_log_probs) # Resolve beam origin and map to batch index flat representation. - torch.div(self.topk_ids, vocab_size, out=self._batch_index.float()) + self._batch_index = torch.div(self.topk_ids, vocab_size) self._batch_index += self._beam_offset[:_B].unsqueeze(1) self.select_indices = self._batch_index.view(_B * self.beam_size) self.topk_ids.fmod_(vocab_size) # resolve true word ids From adec64dc38cb6bbff638c928e86f58ed06247c1f Mon Sep 17 00:00:00 2001 From: Kedar Thakkar Date: Sun, 3 Dec 2023 23:29:28 -0500 Subject: [PATCH 15/18] cast to int --- onmt/translate/beam_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onmt/translate/beam_search.py b/onmt/translate/beam_search.py index 7eb8eb1..792f530 100644 --- a/onmt/translate/beam_search.py +++ b/onmt/translate/beam_search.py @@ -187,7 +187,7 @@ def advance(self, log_probs, attn, attn_key): torch.mul(self.topk_scores, length_penalty, out=self.topk_log_probs) # Resolve beam origin and map to batch index flat representation. - self._batch_index = torch.div(self.topk_ids, vocab_size) + self._batch_index = torch.div(self.topk_ids, vocab_size).int() self._batch_index += self._beam_offset[:_B].unsqueeze(1) self.select_indices = self._batch_index.view(_B * self.beam_size) self.topk_ids.fmod_(vocab_size) # resolve true word ids From 1422be6700aff7ca6825feb1fb51badd678887a8 Mon Sep 17 00:00:00 2001 From: Kedar Thakkar Date: Sun, 3 Dec 2023 23:31:00 -0500 Subject: [PATCH 16/18] stab --- onmt/translate/beam_search.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/onmt/translate/beam_search.py b/onmt/translate/beam_search.py index 792f530..dbce56b 100644 --- a/onmt/translate/beam_search.py +++ b/onmt/translate/beam_search.py @@ -187,14 +187,13 @@ def advance(self, log_probs, attn, attn_key): torch.mul(self.topk_scores, length_penalty, out=self.topk_log_probs) # Resolve beam origin and map to batch index flat representation. + print(self.topk_ids) self._batch_index = torch.div(self.topk_ids, vocab_size).int() self._batch_index += self._beam_offset[:_B].unsqueeze(1) self.select_indices = self._batch_index.view(_B * self.beam_size) self.topk_ids.fmod_(vocab_size) # resolve true word ids # Append last prediction. - print(self.alive_seq.shape) - print(self.select_indices) self.alive_seq = torch.cat( [self.alive_seq.index_select(0, self.select_indices), self.topk_ids.view(_B * self.beam_size, 1)], -1) From b90b2d85c617dcbe19eee736f69036ecbbb0d4f7 Mon Sep 17 00:00:00 2001 From: Kedar Thakkar Date: Sun, 3 Dec 2023 23:31:55 -0500 Subject: [PATCH 17/18] removes print --- onmt/translate/beam_search.py | 1 - 1 file changed, 1 deletion(-) diff --git a/onmt/translate/beam_search.py b/onmt/translate/beam_search.py index dbce56b..d9471f5 100644 --- a/onmt/translate/beam_search.py +++ b/onmt/translate/beam_search.py @@ -187,7 +187,6 @@ def advance(self, log_probs, attn, attn_key): torch.mul(self.topk_scores, length_penalty, out=self.topk_log_probs) # Resolve beam origin and map to batch index flat representation. - print(self.topk_ids) self._batch_index = torch.div(self.topk_ids, vocab_size).int() self._batch_index += self._beam_offset[:_B].unsqueeze(1) self.select_indices = self._batch_index.view(_B * self.beam_size) From e420eba0e7a32d456a54a40de2c3cd3e91f312ff Mon Sep 17 00:00:00 2001 From: Kedar Thakkar Date: Mon, 4 Dec 2023 09:27:01 -0500 Subject: [PATCH 18/18] Adds HF dataset --- finetuning/make_hugging_face_dataset.py | 6 ++++++ requirements.txt | 1 + 2 files changed, 7 insertions(+) create mode 100644 finetuning/make_hugging_face_dataset.py diff --git a/finetuning/make_hugging_face_dataset.py b/finetuning/make_hugging_face_dataset.py new file mode 100644 index 0000000..ace1d22 --- /dev/null +++ b/finetuning/make_hugging_face_dataset.py @@ -0,0 +1,6 @@ +from datasets import load_dataset + +def parse_hugging_face_dataset(): + hf_data = load_dataset(GEM/sportsett_basketball) + with open('../data/hugging_face/train_input.txt', 'w') as f: + f.write(hf_data) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 1d621c0..1bb6045 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ cffi chardet ConfigArgParse cycler +datasets decorator defusedxml entrypoints