Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
data/
experiments/
data/rotowire/
*.txt
__pycache__/
.ipynb_checkpoints/
*.pyc
*.bz2
*.tar
*.DS_Store
*.idea
6 changes: 6 additions & 0 deletions finetuning/make_hugging_face_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from datasets import load_dataset

def parse_hugging_face_dataset():
hf_data = load_dataset(GEM/sportsett_basketball)
with open('../data/hugging_face/train_input.txt', 'w') as f:
f.write(hf_data)
5 changes: 1 addition & 4 deletions onmt/modules/self_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
It's actually the same module, with more or less flewibility at times,
and a more flexible use of the mask (different mask per element of the batch)
"""
from torch._jit_internal import weak_module, weak_script_method
from torch.nn.init import constant_
from torch.nn.parameter import Parameter
from torch.nn.init import xavier_uniform_
Expand All @@ -14,7 +13,6 @@
import torch


@weak_module
class MultiHeadSelfAttention(torch.nn.Module):
"""
if glu_depth is not zero, we use GatedLinear layers instead of regular layers.
Expand Down Expand Up @@ -59,7 +57,6 @@ def _reset_parameters(self):
constant_(self.in_proj_bias, 0.)
constant_(self.out_proj.bias, 0.)

@weak_script_method
def forward(self, input, attn_mask=None):
"""
Inputs of forward function
Expand All @@ -76,7 +73,7 @@ def forward(self, input, attn_mask=None):

# self-attention
q, k, v = F.linear(input, self.in_proj_weight, self.in_proj_bias).chunk(3, dim=-1)
q *= self.scaling
q = q * self.scaling

# Cut q, k, v in num_heads part
q = q.contiguous().view(seq_len, bsz * self.num_heads, self.head_dim).transpose(0, 1)
Expand Down
2 changes: 1 addition & 1 deletion onmt/translate/beam_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def advance(self, log_probs, attn, attn_key):
torch.mul(self.topk_scores, length_penalty, out=self.topk_log_probs)

# Resolve beam origin and map to batch index flat representation.
torch.div(self.topk_ids, vocab_size, out=self._batch_index)
self._batch_index = torch.div(self.topk_ids, vocab_size).int()
self._batch_index += self._beam_offset[:_B].unsqueeze(1)
self.select_indices = self._batch_index.view(_B * self.beam_size)
self.topk_ids.fmod_(vocab_size) # resolve true word ids
Expand Down
137 changes: 69 additions & 68 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,68 +1,69 @@
attrs==19.3.0
backcall==0.1.0
bleach==3.3.0
certifi==2019.11.28
cffi==1.13.2
chardet==3.0.4
ConfigArgParse==0.14.0
cycler==0.10.0
decorator==4.4.1
defusedxml==0.6.0
entrypoints==0.3
idna==2.8
importlib-metadata==1.3.0
ipykernel==5.1.3
ipython==7.10.2
ipython-genutils==0.2.0
ipywidgets==7.5.1
jedi==0.15.1
Jinja2==2.11.3
json5==0.8.5
jsonschema==3.2.0
jupyter==1.0.0
jupyter-client==5.3.4
jupyter-console==6.0.0
jupyter-core==4.6.1
jupyterlab==1.2.4
jupyterlab-server==1.0.6
kiwisolver==1.1.0
lab==5.1
MarkupSafe==1.1.1
matplotlib==3.1.2
mistune==0.8.4
more-itertools==8.0.2
nbconvert==5.6.1
nbformat==4.4.0
notebook==6.1.5
numpy==1.17.4
pandocfilters==1.4.2
parso==0.5.2
pexpect==4.7.0
pickleshare==0.7.5
prometheus-client==0.7.1
prompt-toolkit==2.0.10
ptyprocess==0.6.0
pycparser==2.19
Pygments==2.7.4
pyparsing==2.4.5
pyrsistent==0.15.6
python-dateutil==2.8.1
PyYAML==5.4
pyzmq==18.1.1
qtconsole==4.6.0
requests==2.22.0
Send2Trash==1.5.0
simplejson==3.17.0
six==1.13.0
terminado==0.8.3
testpath==0.4.4
torch==1.1.0
torchtext==0.4.0
tornado==6.0.3
tqdm==4.40.2
traitlets==4.3.3
urllib3==1.25.7
wcwidth==0.1.7
webencodings==0.5.1
widgetsnbextension==3.5.1
zipp==0.6.0
attrs
backcall
bleach
certifi
cffi
chardet
ConfigArgParse
cycler
datasets
decorator
defusedxml
entrypoints
idna
importlib-metadata
ipykernel
ipython
ipython-genutils
ipywidgets
jedi
Jinja2
json5
jsonschema
jupyter
jupyter-client
jupyter-console
jupyter-core
jupyterlab
jupyterlab-server
kiwisolver
lab
MarkupSafe
matplotlib
mistune
more-itertools
nbconvert
nbformat
notebook
numpy
pandocfilters
parso
pexpect
pickleshare
prometheus-client
prompt-toolkit
ptyprocess
pycparser
Pygments
pyparsing
pyrsistent
python-dateutil
PyYAML
pyzmq
qtconsole
requests
Send2Trash
simplejson
six
terminado
testpath
torch
torchtext
tornado
tqdm
traitlets
urllib3
wcwidth
webencodings
widgetsnbextension
zipp
6 changes: 3 additions & 3 deletions train.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@ data: experiments/exp-1/data/data # path to datafile from preprocess.py
save_model: experiments/exp-1/models/model # path to store checkpoints
log_file: experiments/exp-1/train-log.txt

report_every: 50 # log current loss every X steps
report_every: 10 # log current loss every X steps
save_checkpoint_steps: 500 # save a cp every X steps


# Gpu related:
gpu_ranks: [0] # ids of gpus to use
gpu_ranks: [] # ids of gpus to use
world_size: 1 # total number of distributed processes
gpu_backend: nccl # type of torch distributed backend
gpu_verbose_level: 0
Expand All @@ -77,7 +77,7 @@ normalization: sents
accum_count: [2] # Update weights every X batches
accum_steps: [0] # steps at which accum counts value changes
valid_steps: 500 # run models on validation set every X steps
train_steps: 30000
train_steps: 100
optim: adam
max_grad_norm: 5
dropout: .5
Expand Down