From ed3bf258d1d097cfec8e7b2c5941b5cbaadb9b33 Mon Sep 17 00:00:00 2001 From: wanghesong Date: Fri, 20 Jan 2023 20:15:22 +0800 Subject: [PATCH 01/25] debug BERT-pytorch\bert_pytorch\model\embedding\position.py --- BERT-pytorch | 1 + 1 file changed, 1 insertion(+) create mode 160000 BERT-pytorch diff --git a/BERT-pytorch b/BERT-pytorch new file mode 160000 index 0000000..d10dc4f --- /dev/null +++ b/BERT-pytorch @@ -0,0 +1 @@ +Subproject commit d10dc4f9d5a6f2ca74380f62039526eb7277c671 From 3599ade11128263a7c0ad8f160059d1f0b91af9a Mon Sep 17 00:00:00 2001 From: wanghesong2019 Date: Sat, 28 Jan 2023 17:39:57 +0800 Subject: [PATCH 02/25] =?UTF-8?q?=E4=B8=BAbert=5Fpytorch/dataset/vocab.py?= =?UTF-8?q?=E4=B8=AD=E7=9A=84=20TorchVocab=E7=B1=BB=E6=B7=BB=E5=8A=A0self.?= =?UTF-8?q?itos=E5=92=8Cself.stoi=E7=9A=84=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bert_pytorch/dataset/vocab.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bert_pytorch/dataset/vocab.py b/bert_pytorch/dataset/vocab.py index f7346a7..263ac25 100644 --- a/bert_pytorch/dataset/vocab.py +++ b/bert_pytorch/dataset/vocab.py @@ -33,6 +33,9 @@ def __init__(self, counter, max_size=None, min_freq=1, specials=['', ' to zero vectors; can be any function that takes in a Tensor and returns a Tensor of the same size. Default: torch.Tensor.zero_ vectors_cache: directory for cached vectors. Default: '.vector_cache' + Attributes: + self.itos表示所有token组成的词表; + self.stoi表示所有token和其在self.itos中的索引构成字典 """ self.freqs = counter counter = counter.copy() From 9d54ff08d29f2ce87c58cd9df23122473e4c5bc9 Mon Sep 17 00:00:00 2001 From: wanghesong2019 Date: Sat, 28 Jan 2023 19:32:52 +0800 Subject: [PATCH 03/25] debug bert_pytorch/dataset/vocab.py WordVocab line 130 line.repleace() --- bert_pytorch/dataset/vocab.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/bert_pytorch/dataset/vocab.py b/bert_pytorch/dataset/vocab.py index f7346a7..a070ea5 100644 --- a/bert_pytorch/dataset/vocab.py +++ b/bert_pytorch/dataset/vocab.py @@ -33,6 +33,9 @@ def __init__(self, counter, max_size=None, min_freq=1, specials=['', ' to zero vectors; can be any function that takes in a Tensor and returns a Tensor of the same size. Default: torch.Tensor.zero_ vectors_cache: directory for cached vectors. Default: '.vector_cache' + Attributes: + self.itos表示所有token组成的词表; + self.stoi表示所有token和其在self.itos中的索引构成字典 """ self.freqs = counter counter = counter.copy() @@ -124,13 +127,18 @@ def __init__(self, texts, max_size=None, min_freq=1): if isinstance(line, list): words = line else: - words = line.replace("\n", "").replace("\t", "").split() + #原来的replace不能将"\t"、"\n"替换为"",故进行如下更改 + #words = line.replace("\n", "").replace("\t", "").split() + words = line.replace('\\t', '').replace('\\n', '').split() for word in words: counter[word] += 1 super().__init__(counter, max_size=max_size, min_freq=min_freq) def to_seq(self, sentence, seq_len=None, with_eos=False, with_sos=False, with_len=False): + """将句子转化为由self.stoi中的token对应的index组成的list,如: + sentence = 'Welcome to the the jungle', 则to_seq(sentence)返回 [7, 14, 5, 5, 11] + """ if isinstance(sentence, str): sentence = sentence.split() @@ -153,6 +161,9 @@ def to_seq(self, sentence, seq_len=None, with_eos=False, with_sos=False, with_le return (seq, origin_seq_len) if with_len else seq def from_seq(self, seq, join=False, with_pad=False): + """将to_seq()函数返回的由index组成的list转化为self.stoi中对应的token组成的list,比如 + seq=[7, 14, 5, 5, 11],则from_seq(seq)将返回['Welcome', 'to', 'the', 'the', 'jungle'] + """ words = [self.itos[idx] if idx < len(self.itos) else "<%d>" % idx From 6a2cae90791e8e1d7caec92b9f687b3cca1d18ba Mon Sep 17 00:00:00 2001 From: wanghesong2019 Date: Sat, 28 Jan 2023 19:48:36 +0800 Subject: [PATCH 04/25] debug bert_pytorch/dataset/vocab.py WordVocab line 130 line.repleace() --- BERT-pytorch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BERT-pytorch b/BERT-pytorch index d10dc4f..9d54ff0 160000 --- a/BERT-pytorch +++ b/BERT-pytorch @@ -1 +1 @@ -Subproject commit d10dc4f9d5a6f2ca74380f62039526eb7277c671 +Subproject commit 9d54ff08d29f2ce87c58cd9df23122473e4c5bc9 From 3a873cd5b9a1d0484a662a71b2593b1d2ddda7d1 Mon Sep 17 00:00:00 2001 From: wanghesong2019 Date: Sat, 28 Jan 2023 20:21:09 +0800 Subject: [PATCH 05/25] debug bert_pytorch/dataset/vocab.py WordVocab line 130 line.repleace() --- bert_pytorch/model/embedding/position.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bert_pytorch/model/embedding/position.py b/bert_pytorch/model/embedding/position.py index d55c224..5b02c78 100644 --- a/bert_pytorch/model/embedding/position.py +++ b/bert_pytorch/model/embedding/position.py @@ -16,7 +16,13 @@ def __init__(self, d_model, max_len=512): div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() pe[:, 0::2] = torch.sin(position * div_term) - pe[:, 1::2] = torch.cos(position * div_term) + + #pe[:, 1::2].size(-1) is less than div_term.size(-1) when d_model is an odd number + if pe[:, 1::2].size(-1) >= div_term.size(-1): + pe[:, 1::2] = torch.cos(position * div_term) + else: + cos_len = pe[:, 1::2].size(-1) + pe[:, 1::2] = torch.cos(position * div_term[:cos_len]) pe = pe.unsqueeze(0) self.register_buffer('pe', pe) From 919adf1ff7d050bb5ab2955caee00b7f994e7e94 Mon Sep 17 00:00:00 2001 From: wanghesong2019 Date: Sun, 29 Jan 2023 18:10:36 +0800 Subject: [PATCH 06/25] =?UTF-8?q?1.=E4=B8=BAdataset.py=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E6=B3=A8=E9=87=8A=EF=BC=9B2.=20=E5=A2=9E=E5=8A=A0data=E6=96=87?= =?UTF-8?q?=E4=BB=B6=EF=BC=9B3.=20vacab.py=20130=E8=A1=8C=E5=9B=9E?= =?UTF-8?q?=E9=80=80=E6=88=90=E5=8E=9F=E5=A7=8B=E7=89=88=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bert_pytorch/dataset/dataset.py | 16 ++++++++++------ bert_pytorch/dataset/vocab.py | 6 +++--- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/bert_pytorch/dataset/dataset.py b/bert_pytorch/dataset/dataset.py index 7d787f3..575d58e 100644 --- a/bert_pytorch/dataset/dataset.py +++ b/bert_pytorch/dataset/dataset.py @@ -14,17 +14,17 @@ def __init__(self, corpus_path, vocab, seq_len, encoding="utf-8", corpus_lines=N self.corpus_path = corpus_path self.encoding = encoding - with open(corpus_path, "r", encoding=encoding) as f: + with open(corpus_path, "r", encoding=encoding) as f: if self.corpus_lines is None and not on_memory: for _ in tqdm.tqdm(f, desc="Loading Dataset", total=corpus_lines): self.corpus_lines += 1 if on_memory: - self.lines = [line[:-1].split("\t") - for line in tqdm.tqdm(f, desc="Loading Dataset", total=corpus_lines)] + self.lines = [line[:-1].split('\t') + for line in tqdm.tqdm(f, desc="Loading Dataset", total=corpus_lines)] self.corpus_lines = len(self.lines) - if not on_memory: + if not on_memory: #下面的file和random_file是什么作用? self.file = open(corpus_path, "r", encoding=encoding) self.random_file = open(corpus_path, "r", encoding=encoding) @@ -35,6 +35,7 @@ def __len__(self): return self.corpus_lines def __getitem__(self, item): + #对按索引item随机选出的sentence pair进行mask和padding填充 t1, t2, is_next_label = self.random_sent(item) t1_random, t1_label = self.random_word(t1) t2_random, t2_label = self.random_word(t2) @@ -62,11 +63,13 @@ def __getitem__(self, item): def random_word(self, sentence): tokens = sentence.split() - output_label = [] + output_label = [] #真正被masked的token用mask_index(=4)填充,随机替换的用随机数填充,其他用token在词表self.vocab.stoi对应的索引填充; for i, token in enumerate(tokens): prob = random.random() + #BERT随机选择15%的tokens进行mask if prob < 0.15: + #对于随机选择的15%的tokens,再做一次随机 prob /= 0.15 # 80% randomly change token to mask token @@ -93,6 +96,7 @@ def random_sent(self, index): t1, t2 = self.get_corpus_line(index) # output_text, label(isNotNext:0, isNext:1) + #以50%的概率返回原始的(sentence,next_sentence) pair,否则对next_sentence随机采样 if random.random() > 0.5: return t1, t2, 1 else: @@ -122,4 +126,4 @@ def get_random_line(self): for _ in range(random.randint(self.corpus_lines if self.corpus_lines < 1000 else 1000)): self.random_file.__next__() line = self.random_file.__next__() - return line[:-1].split("\t")[1] + return line[:-1].split("\t")[1] \ No newline at end of file diff --git a/bert_pytorch/dataset/vocab.py b/bert_pytorch/dataset/vocab.py index a070ea5..08dbf60 100644 --- a/bert_pytorch/dataset/vocab.py +++ b/bert_pytorch/dataset/vocab.py @@ -127,9 +127,9 @@ def __init__(self, texts, max_size=None, min_freq=1): if isinstance(line, list): words = line else: - #原来的replace不能将"\t"、"\n"替换为"",故进行如下更改 - #words = line.replace("\n", "").replace("\t", "").split() - words = line.replace('\\t', '').replace('\\n', '').split() + #原来的replace不能将"\t"、"\n"替换为"",故进行如下更改(please忽略该注释) + words = line.replace("\n", "").replace("\t", "").split() + #words = line.replace('\\t', '').replace('\\n', '').split() for word in words: counter[word] += 1 From 2216e03dc1ddfcf14d31b97013aa58a0c133f2b4 Mon Sep 17 00:00:00 2001 From: wanghesong2019 <51843210+wanghesong2019@users.noreply.github.com> Date: Sun, 29 Jan 2023 18:21:11 +0800 Subject: [PATCH 07/25] Create corpus.txt --- data/corpus.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 data/corpus.txt diff --git a/data/corpus.txt b/data/corpus.txt new file mode 100644 index 0000000..ed144f5 --- /dev/null +++ b/data/corpus.txt @@ -0,0 +1,2 @@ +Welcome to the the jungle +I can stay here all night From 3189be7b0ac398bd983eb62c37945569b7cccb53 Mon Sep 17 00:00:00 2001 From: wanghesong2019 Date: Tue, 31 Jan 2023 20:30:10 +0800 Subject: [PATCH 08/25] =?UTF-8?q?=E4=B8=BAbert=5Fpytorch/model/embedding?= =?UTF-8?q?=E4=B8=8B=E7=9A=843=E7=A7=8Dembedding=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- BERT-pytorch | 2 +- test_bert.py | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++ test_bert.rar | Bin 0 -> 1522 bytes 3 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 test_bert.py create mode 100644 test_bert.rar diff --git a/BERT-pytorch b/BERT-pytorch index 9d54ff0..919adf1 160000 --- a/BERT-pytorch +++ b/BERT-pytorch @@ -1 +1 @@ -Subproject commit 9d54ff08d29f2ce87c58cd9df23122473e4c5bc9 +Subproject commit 919adf1ff7d050bb5ab2955caee00b7f994e7e94 diff --git a/test_bert.py b/test_bert.py new file mode 100644 index 0000000..3496ae0 --- /dev/null +++ b/test_bert.py @@ -0,0 +1,123 @@ +Skip to content +Search or jump to… +Pull requests +Issues +Codespaces +Marketplace +Explore + +@wanghesong2019 +songyingxin +/ +BERT-pytorch +Public +forked from codertimo/BERT-pytorch +Fork your own copy of songyingxin/BERT-pytorch +Code +Pull requests +Actions +Projects +Security +Insights +BERT-pytorch/test_bert.py / +@songyingxin +songyingxin note +Latest commit e32e2ad on Jul 30, 2019 + History + 1 contributor +94 lines (76 sloc) 4.35 KB + +import argparse + +from torch.utils.data import DataLoader + +from bert_pytorch.model import BERT +from bert_pytorch.trainer import BERTTrainer +from bert_pytorch.dataset import BERTDataset, WordVocab + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument("-c", "--train_dataset", required=True, + type=str, help="train dataset for train bert") + parser.add_argument("-t", "--test_dataset", type=str, + default=None, help="test set for evaluate train set") + parser.add_argument("-v", "--vocab_path", required=True, + type=str, help="built vocab model path with bert-vocab") + parser.add_argument("-o", "--output_path", required=True, + type=str, help="ex)output/bert.model") + + parser.add_argument("-hs", "--hidden", type=int, + default=256, help="hidden size of transformer model") + parser.add_argument("-l", "--layers", type=int, + default=8, help="number of layers") + parser.add_argument("-a", "--attn_heads", type=int, + default=8, help="number of attention heads") + parser.add_argument("-s", "--seq_len", type=int, + default=20, help="maximum sequence len") + + parser.add_argument("-b", "--batch_size", type=int, + default=64, help="number of batch_size") + parser.add_argument("-e", "--epochs", type=int, + default=10, help="number of epochs") + parser.add_argument("-w", "--num_workers", type=int, + default=5, help="dataloader worker size") + + parser.add_argument("--with_cuda", type=bool, default=True, + help="training with CUDA: true, or false") + parser.add_argument("--log_freq", type=int, default=10, + help="printing loss every n iter: setting n") + parser.add_argument("--corpus_lines", type=int, + default=None, help="total number of lines in corpus") + parser.add_argument("--cuda_devices", type=int, nargs='+', + default=None, help="CUDA device ids") + parser.add_argument("--on_memory", type=bool, default=True, + help="Loading on memory: true or false") + + parser.add_argument("--lr", type=float, default=1e-3, + help="learning rate of adam") + parser.add_argument("--adam_weight_decay", type=float, + default=0.01, help="weight_decay of adam") + parser.add_argument("--adam_beta1", type=float, + default=0.9, help="adam first beta value") + parser.add_argument("--adam_beta2", type=float, + default=0.999, help="adam first beta value") + + args = parser.parse_args() + + print("Loading Vocab", args.vocab_path) + vocab = WordVocab.load_vocab(args.vocab_path) + print("Vocab Size: ", len(vocab)) + + print("Loading Train Dataset", args.train_dataset) + train_dataset = BERTDataset(args.train_dataset, vocab, seq_len=args.seq_len, + corpus_lines=args.corpus_lines, on_memory=args.on_memory) + + print("Loading Test Dataset", args.test_dataset) + test_dataset = BERTDataset(args.test_dataset, vocab, seq_len=args.seq_len, on_memory=args.on_memory) \ + if args.test_dataset is not None else None + + print("Creating Dataloader") + train_data_loader = DataLoader( + train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) + test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \ + if test_dataset is not None else None + + print("Building BERT model") + bert = BERT(len(vocab), hidden=args.hidden, + n_layers=args.layers, attn_heads=args.attn_heads) + + print("Creating BERT Trainer") + trainer = BERTTrainer(bert, len(vocab), train_dataloader=train_data_loader, test_dataloader=test_data_loader, + lr=args.lr, betas=( + args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, + with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq) + + print("Training Start") + for epoch in range(args.epochs): + trainer.train(epoch) + trainer.save(epoch, args.output_path) + + if test_data_loader is not None: + trainer.test(epoch) diff --git a/test_bert.rar b/test_bert.rar new file mode 100644 index 0000000000000000000000000000000000000000..e27ddce05a81728654a91896aedcc081ce830e47 GIT binary patch literal 1522 zcmVQtL7KP>u!8=F(rvd64zz$$s21p4t#zm4je1T zPw<}-KOcZ6sFUH{zZYUDVJ9JiR6#G(^md`R+|W?QUkRU1SNZ*aV;8b#o+#1snG#DN zz4yr>*&ceK%sfQfiMRO;KgANn@!c0a-Xg#P*#tg&bhih7fP@r1`0;S_9tv%0bbayv zMa}{9r``H@V2e;yn&)KYdDjP=@R){T&m1qAcq${6*Wuu-m-d<&YKCBKhfD^FG6js} z$o+KmoILDrLx5yxPRC$Xd=M@jp3&e5yDp<@hC`tGoH~I#5}v87rc=UeBVfI?SvGDI z5XJt*&?vVj?vnfYW8@YH}q|~p0W?i=H@&kMOj2z#vWZTM;X|cJ< zrl4)~57Nnyt`85Gw$*c-IfMRFMi%)gYw+|769nib>{dDl+~`+sU~=GD z58MhrwK-hyjs%?_nX(2JWM5bY>-t?$BP4@bG&&IUSuQ9XsAt4mlJHXBT)kvSmt7nt zut%hRz=fF^+nMQdesI8wNXHB`5K-XU(+G5&l#z=vPlmI=_7SMOmXA>5@{K4h5`u|P zkdXq?>HZs&T9(kC4zd}ghu}z5MTj@+NF6oHJZh}V{*#_LtZmK z@LHHrjR~-l!|d0~euF3E)7$g)#kIcQi?4w#sOQH6h$MyDZ+=y-a)Wc48a=Mx7o5C% zhUJjEHqjcyMGKlY$KS(?aJHNt&1I)pdPcOLZ2Rmk@@r=+SCi5~EFh?B?3dUML2WrO z*3h=g3@=}#defWiP1ttIsVsGVy?q?xi|431FF-reT0d+6@7~sNB|44MFJ{<*6lbB? zAR*^pZ=*u_UB+9V0KvhTyvw*nhUgr^< z%mbqb6}tK1S*l_xGwsOb>6y3fTV$a%r3X^ze~ve*u>#%r;=_xFTc{7cFY=;R_J=tZTl8igEQOmuQ%;IT-NjD(aH2dPIcQhFx@29HH8kRnDNRhGn)Z zQoFd)Ut0z|FSX2yLZ0P={Mgx*O7(RxZA72VeDYff*hRaSR<^S0?8g^7Uk-I}$Dopp=fE41*~bNw}`tdzTZh`xcP zRot|1AVVz`Yp)%P_q`p?QC&Qft-1+%410QANxhzMUt@FCusJ-?6MmOtLKSVbvgFgz z&d-=Z%oH-$Y?G&$#u{h&e!d1XJ2TR5Ml-5oS(Rv~b$ZHg0hVZL*i^v|@pQ>AE{nDH zZ@PMZdqKXRb|m|I9bach!1ZVELZm{hIWKe8<1pw|UT&*C_Tx52{1Cjwx;8zdDJa*E YW$mLe^vdF@@bpx!z#Vs1Q3C}808mNjGynhq literal 0 HcmV?d00001 From 8e86425e0e6e71a85246436983180075cb868f84 Mon Sep 17 00:00:00 2001 From: wanghesong2019 Date: Tue, 31 Jan 2023 20:54:42 +0800 Subject: [PATCH 09/25] add comment for embedding --- bert_pytorch/model/embedding/position.py | 4 ++-- bert_pytorch/model/embedding/segment.py | 5 +++++ bert_pytorch/model/embedding/token.py | 4 ++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/bert_pytorch/model/embedding/position.py b/bert_pytorch/model/embedding/position.py index 5b02c78..9f3420e 100644 --- a/bert_pytorch/model/embedding/position.py +++ b/bert_pytorch/model/embedding/position.py @@ -12,8 +12,8 @@ def __init__(self, d_model, max_len=512): pe = torch.zeros(max_len, d_model).float() pe.require_grad = False - position = torch.arange(0, max_len).float().unsqueeze(1) - div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() + position = torch.arange(0, max_len).float().unsqueeze(1) #论文编码公式的分子 + div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() #论文编码公式的分母,先取log,再exp;渐少计算量? pe[:, 0::2] = torch.sin(position * div_term) diff --git a/bert_pytorch/model/embedding/segment.py b/bert_pytorch/model/embedding/segment.py index cdf84d5..6b89e6c 100644 --- a/bert_pytorch/model/embedding/segment.py +++ b/bert_pytorch/model/embedding/segment.py @@ -3,4 +3,9 @@ class SegmentEmbedding(nn.Embedding): def __init__(self, embed_size=512): + """ + 和TokenEmbedding不同,下面的__init__()中的第一个参数不是vocab_size而是3, + 因为SegmentEmbedding实例化后的input是segment_info,一个由0,1,2三种元素组成的向量 + 故SegmentEmbedding初始化时只需要初始化3个向量即可; + """ super().__init__(3, embed_size, padding_idx=0) diff --git a/bert_pytorch/model/embedding/token.py b/bert_pytorch/model/embedding/token.py index 79b5187..d7a2a6c 100644 --- a/bert_pytorch/model/embedding/token.py +++ b/bert_pytorch/model/embedding/token.py @@ -2,5 +2,9 @@ class TokenEmbedding(nn.Embedding): + """nn.Embedding class is the Parent class of TokenEmbedding + nn.Embedding(vocab_size, embed_size) return vocab_size vector with dimension of embed_size; + nn.Embedding's method forward(self, input: Tensor) -> Tensor以input中的元素为index返回对应的向量: + """ def __init__(self, vocab_size, embed_size=512): super().__init__(vocab_size, embed_size, padding_idx=0) From 2cf41cf41bba2c3fc85e96e4e6fbede76a67a8a7 Mon Sep 17 00:00:00 2001 From: wanghesong2019 Date: Tue, 31 Jan 2023 23:25:22 +0800 Subject: [PATCH 10/25] =?UTF-8?q?single.py=E4=B8=ADmask=E4=BD=9C=E7=94=A8?= =?UTF-8?q?=E7=9A=84=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bert_pytorch/model/attention/single.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bert_pytorch/model/attention/single.py b/bert_pytorch/model/attention/single.py index 701d2c2..394d35c 100644 --- a/bert_pytorch/model/attention/single.py +++ b/bert_pytorch/model/attention/single.py @@ -15,7 +15,9 @@ def forward(self, query, key, value, mask=None, dropout=None): / math.sqrt(query.size(-1)) if mask is not None: - scores = scores.masked_fill(mask == 0, -1e9) + #transformer中的mask的作用:encoder中是去除序列的影响;decoder中是去除'不可见逻辑' + #这里显然是前者; + scores = scores.masked_fill(mask == 0, -1e9) #注意mask和score需要是可广播的 p_attn = F.softmax(scores, dim=-1) From 7862e1c2950ca0e86af914dfdf7c15c0c699e5f6 Mon Sep 17 00:00:00 2001 From: wanghesong2019 <51843210+wanghesong2019@users.noreply.github.com> Date: Thu, 2 Feb 2023 20:46:53 +0800 Subject: [PATCH 11/25] Create tyr.jpg --- img/tyr.jpg | 1 + 1 file changed, 1 insertion(+) create mode 100644 img/tyr.jpg diff --git a/img/tyr.jpg b/img/tyr.jpg new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/img/tyr.jpg @@ -0,0 +1 @@ + From 97c9ac693b9f215d7769226dd0043dcd60a6b03e Mon Sep 17 00:00:00 2001 From: wanghesong2019 <51843210+wanghesong2019@users.noreply.github.com> Date: Thu, 2 Feb 2023 20:53:42 +0800 Subject: [PATCH 12/25] upload images --- img/1.PNG | Bin 0 -> 11034 bytes img/2.PNG | Bin 0 -> 14312 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 img/1.PNG create mode 100644 img/2.PNG diff --git a/img/1.PNG b/img/1.PNG new file mode 100644 index 0000000000000000000000000000000000000000..5bc31ffac9b7d62a62372b4a290fa00f7cddb15c GIT binary patch literal 11034 zcmc(F2UJttwk}rqRhlA1X<}4Vlx9Ih5&;DZ5IZ6zG(m)b(p!QG3IY-k6%jBZO=@Tf zJyDciCG-H1mPiQ*fk67(;Xm)ZbMH8JyfI!GmoZ4zPUc=~t~uxW)?VM2m)Fcq#JBC; zCL$ste)-ac8zLf`!9crFY%A~z?oEOLZyUUCm>7x_b|_2(7n@xS%nU?CiqYG-_FI7K zzaCw(^cE43_%8h4(CGQaNkrr*_VNXTTLHH8k&(QE{YjKXhR&pVsNp^hIiyPD9`}ul zW<4h#O2u4MIJR9)%E2D3vBOpO%30Nq&33~l2R5o4*plD(eDUzZ6|=awbKs=DfMY5> zv4{^Fj(kgxuNSK=tAY_z<_q&(Y9VgDxjZj(`q1rcg7R8UXQ7Qo-ksh;n^|3sJN+c! z9Pqd4GibwlW9vn&jq8maQ4X8e8z7l_vGs=L;V9|#hTSz8;p|03zJ5Avyxw?Xa6M+d zArkiA-4XsyOhw2_Ep<__YR&kSDd^#n`q4Uq-&4V2Nw@jE8pJ+sB9V99oB!Q{o42ui zVc6AUc-XUWe&hj!6M|(;wwy8)Q+XTW=ZhVzMwM0ux>4W;_OgkV-q~KiorYq2 zV31R*{azU41hv39RDdVG_Uyb9B^_F^c%6|+6>nvwerbnoOa+SO?7dXrc}LVKyiHdyCg55KjwUHr zKIab)oA#@}25jG%L{<9F@34p)s5Ve}CS@3Fh_{Rb)qoa%PnGsQ1*uGaq^d4pRR#GU}uz8*q3eFdtGhcaVYy23qS6WvtBXi;?nzF`7L2)uEYf+e1GZh^a)N zLp;ArF+GNJJup&+O(#o3gY_`6pfb>6b3teBCQZZ0R$|RsBfnQ$dC9*67rkF4JZ|O0 z4}X(u=MRO)rSCqX?fLyveIz`Dd&gJASnP6=e9n(P^(esczD_CPNG>7~=+%e8!r4PC zEk1(`nwqqAWWZu78N_Qb`@*G4JPsR68M;$nIBZr4UzUrK7TzdcFJ>6oJ}fkd;~Q!L zlWwX?0~v@FC&>#9y5Yim#4OkQIqhvEG+=KB?h#X&754MPdA-w1p};*&evIy%RO(NQ zAyO=q=W+T~oYwRIWI}!4zJp0Yd_1>e>RrhkRzsiC%eFI<#5>$<{_vX@ zQ&k>uQ`A5gS}`@V#XDT#qzt%8p|lvLUn|~bWZB;#Hb2p}lD4i#xkYQuf zDeV=hfCnuyFoI#*YPlRlVjRNRv0g0Umg%+Wqc?iCk?-fWN|%fXLpz{win7+A^I@V9 zts|#y{pY5a4EPf9NGRhC)V3CM9ple)_;I2=L9taKl)Z;^I!Zc&0(7miWl{qKgPXN+coOH=CnDOsH1uEH14b(yo>Tp$1f z7TZYjHnw(lax{3MT}kju6#B=)JzF^X9wY+af7eE&gKrHhtj@KglL8gX`xltp5bb9Q zIPR1iv8j+_BP=@U&Q=X#NXF8DBYH48QAXt<(LP-`YGC^Gwhe=34{72~hd~t>H-=(9 zD9tn0mjgtG&*&#|@2}jed5B_@Fb&D(l_;Ws&*48z?ZIDdI-OA1&c0_GJjy$Ek>;*s zRisHOg%w|RLe9#94X;;&fsztqtSa-37}R7>^&P-saE?o-s=-+;bwvHF^%WE29`(?ro?+-AtnxH;Imuh*^u8~gU)?Nj4Q%F8$3=;ob|h-dFGPV zGnjGQ^!-79W1wuD4(UF8hg8!OR%DAqc=gy0oisH$uxWSpp6;?RIWRYbdPoNm=CFAh z8gbnCShaKXdY{XXc(kJW$lL2Nx@ns{ z^|VeYT3f2M%+yv3$PpE} z=G%-u^1{hb){w>rqTr8mZf64BBo>{)R8W}>@8_&IcUps2S9Ju$MQdtDYIVCANx~IY zkm?ss=AP!zls8C9k-qKWk@J&8Qx{Oy*_e(VN;x- zVaTj?`%WBdsb_1pN6|2D_|CQsmLVKt%NWh#P5#r>*L)5~NiGKBOym8u9KLaJE3Esr z2`l*-hn?P%t-GjUp;75d3Om9Q7AG1uA;(r?k9-1c@R|5%cki&VOpfdxt|xZIj%5dI z11(jChmCK2%*#4#xHj-D_?7Nf98Ddre_reJ&hmL?wo7grNK1Ezc>`Kra`}0`Ue*(b z&0Vj8pSh-iu4mkH`m*Dlq*crpWRKu{y%>(*^)+>fY~vKUX}WV5;vB^qfv8Qf9iS{^ zIW(KbVCH-aw(clDafN3!Z*|1I6j%FMQ0j`Rj;=i2)3>|dg0uX+gizX6#ao=cpyeuSmIm*5^yrO}|8&>OcMYS|54E9z3a8FP64lp%CdBXRUa$ zoLP|f;CiysHql(k@m~Z-z57JeTsiP$HD}?b>G7_bZR`M~xqA#J8ok2uiBG!|^ZW3%wEJSA3Mkge6$XzWMFs7Ns=OS-61(#rUhf?)=*A_! znN?FJ^IGsU(blWkyF2fPar`#eO{08|(byv^#ra@*I=L_z&K^`2wXrVZbaf5@r3M7|}@Eq=}TMdFpgTi@`!x_;UH`(#AQE8wGdOg*V`BfMFJ5;We1A(V!}6aJecyi#i2r6x$;yCpF|voLSlQjO#}`VAfZbJw%kLBYU5J_U zF6k)I;Q5W8Rbx~*T=cifggIgcSEk1!1eP$1;?|aI9e1rdF6nGvA(nj<^5(xvNqpgc zBi&lSlM;}9$)`vAif7%$uO&JGtI-PMmXO-7Zn0xoZZ|lWesr5Sst68jMMKybuRyK3 z6@O8jhaQGEX*80+{e#Nc8QY6-rF&Mpu|0Rt9o64r>W$JC9(kyq*?@U&uLKZosVdiVt>Bm^uG zTOB2R2=&JX;?i#^QJ7dSN%$_fzTfiPFFFb$BN3B-JO?E8mP1|5gS=Nyv_K9OTieDQ z&W?6ac7RuGG%C<=)T)!a?=+&lu;dUvo2UtQ<5hvuQ0DcLQ04l_=6}CRa!ka(=5>~j zM{mUKZyuOfyk{vpM%BRnXM?MMJX89bH9Y$@>Z4=%qyH5i;9v#2Zp`n3Ou0mV##TU0RgzRQ;s*6G&?%|8~s#KiT`6(l!ZC%8mwF z4l|a;AZdY2S~i-SFfC*WL>it6_w~Po&6q91hqs-=CTRQmJ}zu(0!vCP>9VkCcP#dT z*0^B47Oof~ZD`GB&8l+qwls^jS91J6q$*-aasXtFgTGe`PoKCl4 zP);S$F(DnP2ss#RY(;L3QQT`Xi&5+DAK@G)>l)HN5<7nL%o>r4ge%bJ-uKh%aGH8} zaP8JhR+1sdWrB!xr~1x8Jo|zpSD&t6m3L_kGRied(EPI+WXnZ`JtWqnqKQw;V4Dck zD~~u=D`nbb{9)tBP$qlvAE(uYP~Q8{%zZySxi(NyR+(29*wdW7SW&4{Z&NC{Xf_FK zMfw3gv*&uq_m#KAKbKYh-kZd;%}kZ2VB+JFrAb&;`|IWSN_(r>tyeHGw`)2^BsJsr zkQ=m&BAqHK?hQOt)+&V8+{tN9m_QC)NL6!`Z$k*qSx?}4Xy<8h{WY$&D9Hy*8g_2t zA&q*sGa^heB9>=6b&5oDEAyP;t!l=aYK@P<1+;%S3>D0WvMjdkTN@6|MOWD$AC;Oq^p)>?;GBl%i?KjXL%_Fua2t5`wQrw?!my7X9M7#hGg2FW z*{Y7+bDCE+XH2iuw5cu53;Ztc=%!q^#ILA#bS9U&m3t~*h0DygFpa3UIXh|1Lyyu1 zlr3$`!RHd$Z6m)@qFZqsuMBtPOVx~tVz;b%hwvFvfKnM-Hx=D2H$r=w_HN{@e6Vj@ zE9xrEAfN+5OBs2Qq|_PgYu#2nB;I&c+0hc;&Ci_GoS~(=nfQHdb~A#*d-&OO(Bmpy zbUXJk(TaDFe0?_g>>$+Jc6x)s+4<zFGgI0QUnv!#zppx>FqcK8A8E)9%7ahJj9HgvnNKmRkT?I^Ej?kvn4o0 z>Bv3#8*rc1pI=z~kE1NBWq)f-vO012Q6qk2t28+c8@;_Fex8vwTQN(g_S-hD6j-ZS zk-y+b2Wn2YLQEUKr84GermQ_hD?1T9hqXTv4f~wgr zmi}Dbnx|~XPopPCs>t*&3r0f=b1vkMMIi(4c14*_Q@_WttdO@|jqp&{+QF(GFKvCi z5q@mddpSfo!;!vvJ5}$)Jc)a1g0r8zTFBU^U}6amqGx%3-j6rIr^EYa&WVxV2KE*= z#ag(-c19=U<%99yIrP13jK*#~B&G>%eAa*2#}XZM{Ma&|5DsVeYe*H`ep1g~3sdgl+&{RjJ)v#05ah?~E$ zbLZei#D~xlR5a&9xi0j@Tgcw(0g5Xz>oI?ZuTFkV?d(85+OeX02(beJ_lotwNV-`Q z!ATYxiOkF@Vljda@!_ar%X_Ferd7hUJuisgLg?ju??TuteTbZmn`&o_(f)YSn2uz| zN(Q*H^VGc7njIpPD_133;?X_}JjU3R`v9VsVA4fy6)qVxGD zi9-uTfooM-2;rZj&FJmTuXPA5$$6=rW{EmSYYY42%WNZ1H}RgsL6%9X6hgI%rgx*x zcm_eTB; z2`mF04cR@$eS}TzTAfLG?fYY@&O29dahPBQ=bvpuSL$_?q?WxZGUe|zZ@h}7sOQBZ zA6lG!f%EmjpvPD?)!cg0{Nr;bKFU#lD&8xVThTQJ%~2g@AC`j$`~}{^C3cF?P{qC- zKCxFR0)nVjie_BX_rOFLrru<_=t69OV49GMZzE1FA*ih;Q>g8|n3C9v`3g@u{srp> zhJ$EkfA@N9bsze&$Xpr6tB}mHiALY9MH_pT&^#ZTA3r0pSn=Uf@We{<(T>ET*gKpk zzi>uciGEVHxBT_dayIhJaVw}sD`W@Os9(vYfY;|V^03wwwNh~|%lLtjX=8EMY-tE| zXmu||z*fre<~%0X7e_TtJ&L`hWyoc>Oi`hHD)E$Hd}UYfw>>J}P0F>4p2<)a9dl~I zb~eBwES{*Fx&1L@-iYZRG&+4SKXuXFspO1r_Ug$CYQ} z`C?Gs>TX2P%??!Tl3$}=OU;b<-JaDnVZu=I_|)pfQ*ZiG%TE=Wly??3E|i;ku2>Xws+uZG8nI`b z1_TE=A7YJ6h)1VEXS89c<7)WKq1(0dgDleN!B^NMd=SzbE&cl;Jou$zSQ`lXs@J(Wox*L9P zwGOcuelDI31qcgf+FeM?kmDEMOvrVPisz_(Rg!<8yi@2T%?oN%7nndt6Abp~jfk?H z&3P%YnZuha9Y3M5vzvU?2MF$+a_EGa*(E*jc#WUUOhpnsOP=!R*s%6ASB%l3Rj8oh zJLkqJ>ppAR&Ggahu1pd?&_l>#Xi*6Kj!3`IX1^YGmN^@oB3d^(X%U;25g>k<(qoH# z3?UZw__Mh=HA*YoUhgYc8}(ANZ5r=C_wM;&UW)j`Xo0Z&st~vOuuL-Z;v5X;5m?|f zdwI5-X7MnALMXeqdjE|1cFw69b#V&5tMGYaZYkPWomo!fWch_O`tn<gP;iW>{Um!+Ln>X2$f&wd2OW@r{fdFmFyuY#7B^mr}y(2oD_go?eli z2k-Fh7@#5adaBHBaNLjy07j0mU5ec~@6N8SbpRsG6GC(Q9Xb`Mh|J$4VyvS4J9cmX zXmgrS+Myoj9Jgl|4Blhk6?HZdobKl1ipP5I=hy>EAxw3z{@Y? z4Pv$^)_r-?P3wY^9Y8^d_{xBRgSAj-(o_Ksx&UDX9QTC+)O#u5*gcYYHPKyD{J-+; z&;`hg4veb^BE;oK*|l{6>QMMXjL6JkV}Mc-idO%dG2&b>$afB#pEM_1YZ>k}1;{Kw zPXIVQSC!pb$dCwCCsN^AUr{QT* zL$vf#yo7LIkF||gKgDVkg##1}aP2MvkQo4JxA||kHvK*IeZU*=C;w==`yimLbv^x0 zlG|6iwG-lE>#Ez=t$$Lb&jSKLWV0-IezcZ6MiP3_1HvQ_E@sY6@9l` zJi5ds1MJNLyZs*@F46XiJPCY_PGh+2Jugq$vq9^nLwJaaHT@8vE@esaHRMG!VIeM? zubF6Vbl6!RIn=HXFo_34;tz_n?fPe^48;Z?0U`Q&4p0|?q@EWtvT@=OK$O}@rJazg zK&71hU!MU^08FKrioAp{$WJctVJCLB$qVV*fAq2l_@W5-VjxtCPAdR$1$=mVQ0Rm3 z+;!3RERF<-93wM;ICB6fI=hemOq0}&aDd9z`_~WgwlM+YFI7Kmms4Le@x>(v8&)wq z7?Vi7_H=B|N3hcxP?10gG=8T872s>7Vcf*frYuf0>P7EdHwJj)7`64%eliTyP2OZH zobSc0yz}PE$ndTkoq6r)dV1~VZI10%ovS#cXqRE{Reh56yFhqg{wYkpkszsIDjRkZ z(5P^vx*8c>p)>|mTHGI(y;A9F#>2r2we8Uwgx~$+Js$IaO`CHPqzm82ZIi%1cs$cP z$GZ090YHausDntjkb<6{>XlM6kL zQPNIELKhhqwEiAu#o@=%|7m8Zl)XPp@bRzzV3Z&r#cXC6YduqFu7E+BFz|{x1Iw64 z7qFMC&~H2^>qdY4ibj_5V<-ccY|l+&u%%k$99S1IFLK zEDaP>y_l);ztpBaXReeC)W|2xDDnDVmRoB8t;{VJ$&g;HF?|#0G^CYuCq%mrRb}`w zA;aL0p36v?Nyi6P%Qx2R`%ZfH@{J+#?$sRx=dD8?9}5iv_ZAUJDSwg0$Y@SlRt~-_ zN+F2fb*R=hp(}R4Z@T=Bmf|O^jfCCNV)*u4dN-kuKXhN|(iT}#V<8+VAIhwm z^#;2GWCdZD|ci=l)(O3moKjesZI8U7XdO~{J;bH{AGi->p^9{+D1I#z9P?B-# zrNa*ZNON`8!lksI4_A8k!`nqfyl+YTc-Aht6@$iTiWNWKa^ef>&rcff#=@FGuKX}$ zs$VrU>$)s>5-u&u8PhFo%OtX|b2H8lql#mW8((^Ah*RZKp&IYruN2&h3DCz6LR|Xj z@3c5Feacbt-2~0!vPO06jJ0vhiN~9Qbppzdi`7To=j0^@a!eV&Ca~n`oUif9=n6%_ znUJ#=sO4yjqsA4mOTQs;r){g2L)c~E3t<(uyN=v)=X0$P$oJUG0HWIvEo;B2hQ}3~ zMCAZm({rxhsO0<9KRl^1xDljdA=I3*YYehS8ui^%bZrbC$gB~Z*l2k=pNKo|IH(MtQVXHKJRzLs#_4;(KR>ipOrmpahthiX&R)<_3|07woS+W(gax90YfUvghZ~W1td3#Ye9v)$V($ zhqt#=u2^wP@HR`Oo>vF?`wFl;~%u{0vbWL?oR%u*ks90axz zzlVtS)q=wZE>wLfxuX0f5LoCjKMq7m3xDTPXkBev`%^e?YH8={+{$*5Ct%lU)h!~Q z$biLo$4GTMEm$X4hYO^);ZOZrktgZu$Z@+9L_{a#qO^ADXHn#n^oQE@k#-9W?bMdr zkb?$ddt4?-QcKx;%9%$l2)A;ZLl!Y&!1nAGI(y~?ms(Cl5cpk2?HC>!iiNcXx)nG2tzKqsQ4 z_3eH|6py8>KV><~fP=n%m2~{BD`Xk%+725pKY6O$@Z$e$U}2ZQ82`7w5SqCp1BL|5 zJ9#fvN^;YUf*i{Sb&)@zci~t>PSwENXlUU}t0H=BS4y?O$$NekH56~h^mggzT7=j! zJ+lV~r(KmZJAIUy=0m|>@gMNVA)!7*Q*;f)kMc({2+SMt#vH4X;PS1#Z**XcY}Ejl zpXjE8uAiM0aU5{Cu*itt3(7I@2XP0dMN3%|pI?=NwA__@5P6`-i_0BBWO|LnB$@rI zas+&5vv9^Q9xq2pYex>mQ$8n$Z^+-_%q!Ex>@Xh=so&QiVc3zhAtDsA<5JJrWzSwx z(D1JlxQkm~mDKw8{K^E`k$tOKOZW#7S>l)M^SUsKAEj|>pdZir?2lWphwZ=tTrb2Q>me{tkQgJD6*`#pn%I71SsWE@JU2P>UPEOWwMv^4j{Lgx>7Oyvu<~IP!Q$wPtNCeD_5- zo~etss0h3>-9LBgPOyHiqZ|2??W)1diII;7d9puNs!v;R+O_Etbm2sbO44t~92T{k zDc)f`sgw42pOdJj^F7NqU117fi3f9^nhK0Uw|!Q)+g#S?Y0VR9KzV&DKO*tWP%mVd zy)06wpqXi4RmqzUaT=Diz75SHoGRvF+2`4Z^~_mcPwC$YyREj67hpOb7~hHIRQkE6 zj}N;_K47f8K={vRk0zcOc$^+bueG<%|oDmCyN9dOYZwGABnS7GsMi zbWWY^M!i`=MIwh5gWCGj*+&)RF`6)2h3g3HN^&H*eN{~n^EDqkWFnlAY*M`{_;tuv z@Vzkbp70-m-RBR8pCO&Jyd(MM!hrK$uGnB}|V4i_Lwl9g7C%CR7j z8MX7|M(8z!vT7RpJhe5}D)`C!t+Bb3iDe(_{4?h}`bz;_XgkBHv9YCJ9zRlxsS>xh z2~rFq8WJaIP5bSM%S;Q`t4JjyIRL`3$K0YfrKJWzuLOnO(pzQ~;#FqRSbdy$l2RWQ z9%h3rFjwTfrB`dBStrB+$L)}%08;BcS|oKpIe<}>*hv4?nBNIi?n~~o|9CTOm!-O? zWY~f782Nzb8*c7(qC#hx4bZU%;_q(-XU0j3x;R(yEhS$Y=g*Z6SfG@xkoz{BAB5=_ z;h+)s4dInfCz>nNdn4>o(_mJmc9(SlC@uElT<{2L(3vpWr#qI2p0%KzXLpF{kCfGy z74&M@(1HdU|5%^V@KL@+-amVXC%1~t;CyX^Pq&tJ63qNiwaN%r$dNvluWyY~DU*UF zocPQRxU{^AwdPe&ADCTL+rRPlz7r>e_t<^^ gJwL+iIVn(n@-5?IHe|0U;D*TMi{=*!4eg%&56U1WiU0rr literal 0 HcmV?d00001 diff --git a/img/2.PNG b/img/2.PNG new file mode 100644 index 0000000000000000000000000000000000000000..cbd475b48f4023be132190671143ff548cf13a6e GIT binary patch literal 14312 zcmaibcRbZ^ANLU@$!?I%v67UmV{ZpJva>6D=OBBP%E~xK;fQ3<2-%X1BO@Vu)e1k0V! z3i*J~=ff}4Glji$p~n#f=qW5Vwu1XFBhB={3OsqNk85Y+N_k2k-AN-NmcV5 zlCj4I``X7ODN{sA$*g@qc|KqIfQm-x%Y&(AcF)D1+9Fa?4@y)Z9ao^F5|{SEB?R`U zmG~7&OiGj?9W0=Xc5f+pkdZ@+y)Xh2D*&hTfNtKDH_l`ad-T-b=^SL_F;-5MqgOGD z>&jz0!;Vf4h!r`CnECmkjfOD_R3dHiJpH>Flhni8SP^ds1Di=K)1r7R6Gv$X_D^L} zlpkl6JyvnufQqgs=$2Q*kDTL#2BKE zOF+VCe~lhvkzB`e<9+SXsy54qBUXbVe$F`22NivbFou&F#lJmR`lH_K3#5I^uC1Lf z(6=9xH7=>GtJT{rFHRi05@7>Jh&s=UZ6Xmsl=={?TzlM8tcO-IIz`y9yQHzWOb)5S z>I35eB23BwK|2(X^Vi}BL$IA8(3%F z(5y4ZB@}$MsZYjnzM$m#o;cS=oVr6zIVrVdiXHg5d$rM=ODh(1t}RqvD=U74W~2fl zfz#3<&>q8TSlV=4Tf5$PFmU>^Y3$W&w6dcM5u^yaEyl>&o*6mvyNT!R;uBFwiU~+e$VRy1H@L+7J zzRS|ABTrd?{ zj&M7@>kj8W)Ay)7IHZW-oV%O*U1P#r_V2A(b(*Nv7ceSUlGTX6kP@`_GN_MV_{&Q{ zL)ZF^XA%#mgGU@!toUH}ACN;^)%;x)2Kg_&b{gJw9Q+0+85gA^8>C@w5?Hczx!db+ zs&|vSbgC0;65zzgF%3tEU(c29Z{nfp+c>*mGc`1t(;hD#?INqcAlvCq8$Be=GIKl$ zE0l3Pn?`|YU6<^S?j5*aK}*>BF_Ffadu?OL#2iv zrUL6Ema^hG%3ud8?*|{BCf{ypEhfP*xEAdM^7HSNjlSZve##|Hb2g}O#H z$n3y7!)BFxeKCPquLJN#m0s)Sx`mZ&Fow`wx87eenQn@aFPw*buIK2U`U4axpPo@L z4tNfkT@LS7zHFAo;{Q{Ar%#nN`O50QP%oB`HSpJ)1smok&S!+CS*}?FnzbbCMHx=?`0IEp68WBLcwcB@Hu1_9$Pp^I-SJ;Q^o_lFg z(&X?aj7h>@Sfr0ysZOCp4MITkD+-M?hnJgvDOdQqgKM1kgp?|!xOH~e?e!P-O}g@O zaVen(B?w5zIVdTv+`gY%Bw*h|Ky34AvS8z{%n=%nPQpp!Vl*Y zjw*i=U`$K)Xuye&bhH1x|7|vc#$tVJXApMpOC=9bZG_73x!D+51KHz^n71*xN{>3D z4{ah3l{BflDFY+VWrzT5Y+esK@$+1(S#YS~+iYotK)#N!!KOC=a4Q#zxzBg;fdlfV zklGF4j_PC1CTXCh?^wk$eI57T>W-V1+WxATa{&U$sAhv@G;XD>D?Q%&H5-!(tdZac zPp}?0{O~KNEN=bki48ek+W(u42!hf2(F*my0Khd>5P z&_{5C-);f<^=e2n5hOVJ6owDBS$V!cM^lkd0cd0Ot~gHB0Sd8kYOvpeahNe0?4JjN!39;`Q6koSlTnZ6Br57r=~UmmIc5v)P!X;qm5R7??& z3xeU3V)g$SA~73vs{&hO-Y}YQ3Y4OB?>&3=Y+!q-zjFGTS?vI!FgO@{;Z@yQtp<6| z%E)`dD3H6c=*OF2gYBF5db}FnHtys235yN61kDYu-}!E`A9ED2l{%%DD(Wnh4a%n< zV=qKa)Oi(txn6g>^oP{Zc0XYiASgE2Y0JraALECYuAADagAz6hXs%rSW+%V#L#$W< zD98|nBjm-A*s%RIucksS+oS!R-%SfbuopsGBQHBP$5}9v0x9=FU(-jsQ$tOfToz;R z0uCPi7O7GWdG2FZ@~xe#(TC1Vkl^`uiaVOCI%Z}r+XY1L2W{nc3?{yx4R6fBpXQ=? zS7zwh&1zCAx$md9uCmp}e*GnEC$G`DmR|i7D&GCI&pd;G_>j3@F&`1cj+=vRhlHCP z!|>#eus$*)iVX&v2Yak{+4NyZ64M@WFa)A@hLlu4D5%M}+$x;XXfN%irOIIJex({b zI!w7WLgN~YgX~eO?Y%~`w6X)n>hsCs6?u>ZJ z3FGCg<|T#Vjahn4<_G$xE=Tb=y+<}>I0crnaAFyuJniEE9gH8YHJt?No`O)92b}~8 z2q0tVHS3S?V>wS9s!_A0!=V@p+B&|WZ}+Rtl)lYsD%RRrMB4Mmp3+rSf?kTaa|!xG zboJ40C8v_HP!ZGKTw*|-CDlRX7=r)h(ES>Im)c&t>t(Mnrm^V5Cs=F(SPRXm*qqzR znJ1k$A;B6J7U_+u=%C}hzNpixcw3hS-AW2HsYon64?eU z+5rYz^qZ6GJ*2Gu_tc3X{z?y_8O)(N*4bp6xF zczQ-^hhx8tm~crbC@7M3Ai?UumH`JF_9Mx}rBAGwpx?mUJ^q&72avS? z%(GAVa{gP@m=sX|KVN2|CD*=+6}7YYbtOO`dZZYlJStQ?vCQF5#enw`>-Ad$lMQ|V zS>!zbc6G1$@#%e=9oG}QD_9b6=_us_V$q^oKyBs4BP7%`(c;>x<2C&N5C+ct?ycV@ z@OUeInQ;&(!P{gIQ2n>4&REx=gVg=ppnV?CO$<+A)8U*TxUd;Ae|s$V-gpoYU%T+y zkI={xSO`1v$kESh6Lo`GYVq5exVJGf1k895f`tr_V(jVgscj40skQ(duKbxs{~1bF znId0b-)#!qgx8b^q4JYIm5H^acM=+56TZvEU>5g0{`Ma1|D}2yedf%sfO*(8!oea z4#4-HPn+P({DvuD!^-|nhQ{QB%b*x^ujK09 z@VN{RrxkrOGc#^qy{W2q-uD^>;urPzz0Nh%niY6iS|=-MokVZfQoMWH#4{c=uyQ9r zw_98qjb!K_Xw1k85LQN3?c~3{$+S!jld(V0<7Z{u@SzxcQS#zhZ+kGVa%jemtj7;0 zaxu|X(vV&jj)=V5=wH*wKH=8RoI{df1^B)6^Gk$^y3_L z1f^XHR%zh@lhBQl0LNJk#I^nx{kJh8!2hcVBRFbPu?D5)&gziSFJrDv#RNA)#Io)D z;KT$oSGTb?ouLXkHBb>XtA1PPezm7SZIKw?N7)CU4N-3VLjn8=>dI#+BH$_1C;^56M0u8wy3pEhvv**U$b!QF`aRnXE1x; zL+|^<@w&5hGNM zgu+BaU}_&m1NA$yu}Y=0mOM%*8s;aj?%Es%9DZi4#Igz=(Bw*6uUi$L(DW}bitS?Z>e8!kQ z?&^0hDmJTot%Rr+j6IvqmwiL=36(Ami<6B?AFuhrsC0(vOL<_&Zf=u)3&SHrRqIiz zc(Pu{&dPY+$o5(MYK%ZijF=Z$KAnYD$~~$3Q-hspGKF0jW?f~;SQs1;&%nZBaM|4Z zO(Vn63{}vLY{;r|Njoq+rs}_hVKp1rF>3Xf(4lYCTrQ6b36ixRsq1XD(QzerZg_io zmXj|V07Q)%yJ(70n-Ek?;f7sQ8%lrM)h#JYB35S;F5hxJ>~mxq!c4Vk#^>6{EZF3}5JkoLDQVtMv*Wy0MqIv?Hg&c*wBHiZT9 zN7W}cwd66wLkT{Zbl+b10^{!GQ0AChUhB!|t8+V09)IV9>14Fc6Ra@9p7Yfj8jPv| z#3#cZMZ5$rcU0hTjKIoi^yyp-rxqrL=7z6~R%)aL?l0}DY=1X#P1XsZeybt@#3)(F*;t-`wb`Ue z91phXPO(;pY!ab#DgP({%^xX_C4FKUJg>b!B|xx1bf!NrobvJO*I0=?Z2S&I*n%4& ztn!}$NJO26GhAh)Vu*c6FnpwcK$rU`2(>JxQuWTtvB)DkiT(g9S=XzZic|jHETo%f z{~40OeS1-RC+6f=%>Mj)pDi2RbFr-d$ccINf>8{|Na)A>pMH2O3GQUT?2Hdn2me3o zW7VZ^XqN3N#cxn_gA1|;?Eb&9oF^P?Frm!G4gdY6+?)$+7JsGJ>W40Td25kRQ77i9 zCnfYX_jq@JZh-Gr4`RZ6d)VD69aaY%oj>c$ql?+YeznEzQ?2XpQ23i1eHf$*qiq{; z4YcTHFwLTjVG^-rjyUsoT2|b!y69DH+lHG0A~$L0B42CD>PW_{2-&hmko>0#2dupb z+_@9D5mLna-LGDj0OrqSvn{xkEmG&%0{Z4vKPq!+P@My9`TqrNUWen!|aQ1~5ZmZg5LP8gHT2gOn z;QHX;MU{0GUG!z3u2^n~hQB^$wo!C`w7n~AI;0w{T6*Qi`UCXli-rP62a7wWe4uog zy(UIq9XdAc<6p8ti=u||WxP+Ohv_ARB!IKrDGy9cnHb>4JIs7~>VQFUIm~q?IsijR z#Pa0$DCg;g$Dz`q40bH9Gs3V@^1LZmVX=-vap(%Hst4ji+!IArou_ZY*n4qNXP~)K z!!boFbjagE9cTSdM)Kj-_vLm*^%M6R3Wip4pK^G!ey37m8~vcV<4nGL+fndBTw|VQ zLj-j8=6c;csd^kYOh9F)saV1q_%UN_Fe=_A8!LYY6D_gCHc--f8AV^|*o{|!$&uoN=i;XG6jvJ8YThQ^d zNw|?87qrU7W+@xg57MYcdnjo6Fn5L}hHT?GZieBw_ua6(C~g6To*v-e9gZ#Q3 z`C>vjhYfdUx0&&VFz!-QbJeYgnAEe{!;i+(i=SXCD51~ky)L|%iHpT5<4C< zfKv*GYVJ#@nUsaied<($-)W~gXBa3Y-*+l@()?*xR^>v1Ds--;u|zZnB8j)NZVMgQ z_^hg-7C{Z0Ys7wThvNDfb<3IUfZ2F+jrU6X^XIzCZ#vlB9WFpg3(DR$iCuaLXBkfn zqo)alGi9yiL37KcU1e2m3KCU?d12bjxhLM0Ty2H?tI4&4owG8IaD=dh0n(TPr5Gg% zF2ob8oX96AQ;Cx}2M5P}Ln#gnwb2c0j4jeZ9U-UgHyNlign!E=zke2AS*p3XPoa*` zVq<&MbXN!s8;)suEd67_W84;5kf0i15XZDNif_IJD?a^u3y3lDNH!A>+8FGq0#2Qa zk<`Mhzn6ysN{R_N|IpBKT%Q#_T@qc>G;+Ea#FfCV5t=|UZpk4xX~*UyGi3B*a-A8q z#8)R9{hD-0)3r*)qIp3`9z_gElP`g)^2}T~^Fho&i#bSR!UZQ4_%(qmY|J?8JFD2! z!5a$H_zxDvtWoXLt}ckkKr2g_Rh+0vN;;MRk&=|H!{qN6HyLA&^d#zE1i*t z%WxB>Uj98@bIUj;lSc>BTk_(bZ4AnU$iG4NQc6)$U_w=K05DM@dMH0knc9Te9DN~H ziJ#A&0~LZjzm^nKI23<#Rr;6xT`^R`!`t|V5>9vK7udzJ7#7z#J0|V_2z96tZN`Ps zL4L!slVe6+Z(boEen+Iufz|6{enE-TH;! zs$Ot_oK3JgQV?crKuhMof}%U8&~JcLlJbyD)S97fYKqR0?>7FYP6uUm6zcXvyqZDY~sU>^flfK*YeYgc}l!)Cney;ivQPl@2=1 zrHTC3+OA}Pim&?iBSJ+1nR#~K^|on25jKmOB_<0yaM$^YCH&Jh9si*HZ9Q$fYf;!D z8dzD-0S7yu&o4oer_#~2ERjX2JVsxS!UR@8it0IGMw6hw07 zpeL+`$s{sWF@^pP-1shN`+(Q!){9Ac7&0YjO|7soW&>B-yZ5X2Fw`A`pDi?cZ!>rG z8*MG)>`Sy9Jm;F>lbqXMA_D~ZZ}o_JWvdKnl$^4zK)J7a4%yq=LlLmK3js{rnkg4d zLELLD8$!im$UR^CjYbJM$n9@onSP#&$KQ3C#{h8^$}_%}xopD?n;G{^&3`O%%zI@- zHlEYEq(i*$mc*z18Fj>i%4fDn2qZ-%1j=)rH4>{N6nB+U9>m7w7mwfCK191gms55x z_L41a=(VqE#Pp6$U1o1M`gOb=C=`|%SA&HbI@;t0t%QOTRFR|rN9f#beD>JNFBnTb8=V<~1%}U81~%B|>bG`mFp!J{ z(ZJsT`3D||{(-muX_9^4BptVe(`jJr_WROiJ}6SDwR*#~f4Fbzt?xd+Biv``kjN){ zKL-y2Q}=7`IH$M8iTWj0OrHIgE+!^z5p-MgO~|aF)i%tf`S6Ehe!!pc{f~Br@-y#X zV__oaP`Vt@B~p%Wvs^`Y5We$b*I~q4x=NUecpgmO(U#n#?Ss%Ld zpPS#$^KoepWa=-?#)lB*#!-GUJ)>vD`=G;Ag&5Uu{;xP!G$E|aaLY!IBH!Yo+RuHyZ_pq<~dAuo^_(^q)4Coo*dD_f4%;_i3GZC0M%ym8IE z@`%hYr<#-3)e$#X1Y9OALQVt@*ix5oX2cXS#7XAb zq34IU$BrDxn*l8BoVe54TUm0gmpq| zd_LX7ySHCdQ+~DmY4rX{;;i~M&jB~eraownzD0>Cra8J;VcMfcZN92BuMs9{Gv!-& zxdbaLiBm)Pub>^FA5)vWm?nJjuME94 zCf6D-&S7Tr%GQr8XIE1{ef$`l6k`y&t)GUy8QxnbOS&N5S@siO1}dKnIP$rfH_R?~ z#C&j7vEqPv1(gen5Yf(2!FH9EUt7ZH;R-2I7ufNnC{dvZTeoW{@i_E%cY-N7+xXj* zBtmS)D)mb?4ME9xV3FR#B%GzXsQqCm4+4?ooyaz6h~EhPyt062opGWXdpE0&e+mB- z7goLVt$lH4dBsG#q#Z4Ormw8%HP-!wpTOB3zaYxN=+f-{!~RmP6VuN-N1Nm7J&_hg z8ow>u?vkj|4$+6(!)`|#gN;uV^M&GaV{dlejUlmQVPUv=i=G*d@Qa-lXb(8RujW9B zGp(+XkhN&y`d&%pd6OjtzO}q4Ay8xF$iE_EG6^ zc>Y)<%jRLXiQkl%?UjggsD2jU-+hm-!FmXRSx359wMR4G$|iV8_Ef-QTb4%(&HGIq zyl#wZ02zQB5`v4gN`xV@lZGSOt{GRrr#i5%_aImrXs$HC#bQzn=GD&f&@yB4p|x(X z6mVuqfmgBvGJ@}IdtWcwJo-5;|HqNT11GhD1)57{>NA(PZ-XCoEKQ~jJ`Db+h2}I! zjy-KmmOlL10tS}-@4$4i>p5|$483y%A99o!Mf}w_7R~+6aj0bR8F%j@1WWdhH1@Rp zxmrVF6(C43_{VSkE2cf|$$sfcfver7mpf#W_4|9ps%J-$0N(74y_TEHy%=+z&0 z|L+QE&etxN^ov#f4hR2^1N`TM3RcC%i9-_N10q|0cO|6@q`C${9Jk2T#iYvVCJ4$9 z|MSuKqw|M}99I{R78m3C9j#egbJ@C1en!vL@Ffmu>5$&Jsa6rPcfYQIQB%IzE&}Vc ztSjAflXaqOmm^VYc9}9I z*wFCzh6eYb`PGG#IX%(GucuX}E*GB~86>WA=+CArJ)mn)^Ig-LHQzk9e`|HI>dEf? zba_eb6T}aXHY++ry~PfRq}ST`z-)iiUBH%D5;?7f#ELU$t4YeNxd~enZ&^ntwgZb+ z61Ko0qcT&r7KtbSl6Kr(GgN9Fg+FAh`Pu6Vw@$8jiKIl=C>Lt-E%c?9nqHpteBf7E z-TtwP5kJMqL>^F&-l%i#*rEOY5;i7^-kLUzUlaG6yP}9pp+xm_T)K8Q!}Ts9bgoGz z=ND(|zE-y`bL13lHj;mNM@>CCK8(qUHj!k8hs0;D>b@H)XIH{Wk%v6(@DOR3!;_qk@0qKTHID8j z?u*W-?)Yz<{ZMx>x@CAIrQ+P_y&CAd(ir9KnN4NJf(KD9e4ph$(^;!lymK6WWxP-= zm8r1W+3pgEE_;JKUWk7|)$H%#j zj7z;LQ1X&%(|P+8EHv6^aCY`yud+X)0L{PtNHkh%v#)p@S| z0RaI(`p&a|N7(wNA-Kdi#r4uOGn!S_1uTNoU#&%CXzr8UxF=0WVuJ?2DIkc%^t0<$ z?_%oIMMKm3>X83dZfR-RpS@Z`)Y)zbxg$eRp&)M>dVCxhKz!It6?hNSz5TaiGEyoQ zy?7q2?_$lBjzb`w$4mZ4u_8Y=og6eRr)C_+$w7E8|1AvlZ)hRxk!vWR@f7i$4i@^O zCwXiS=f#@(#1!@xBi1WU@&45V$gm?sAvkKi2@nG71Y!&lf{Mq>O~)E(4?mIZKl|0j z==utbDMW~p9|!poOdAqN3I!@&_jjyRu^#ZsK+f($G9mMNA}e6gmCF1-5x&k~+$xUZ zIQ5=oDK5oAJB19S(KE|Dvv($RK3VD?&|ge&4k)m7u(;8}%{~PGv47>2MU-VY$RaB~ zr_9$7Y|xv!Fz31mkt>T6oJl-MudAyYBoKE{Tp9zNiTr~(nyC(L$43X6u}MbtRYD zVBzJAu_X$e#%3Yc<)uQcZ;!Ns(kErFzpRyRe$|pl;wl2)*Ql6@E_6Ir&rQ#GF?~tm zhU}qXEYx{-cj4>hRYNT{Wtne1Mz<993)EAv)$Izdx7xqupzf8-9HO^qDrsIfnZs0& z&aWJyQmQ%6_+dRYdEcK+8b+c9$a^e>Wo&P2LNFMZ*SOpB#*ab!{pld5$Sd^Z$rE3F zRRbgxYk4g$-GP0_@j<+D#T8u*?q5GXaM9QgiVWFkw+qXKstW#b2#0O@-fJ+ z5)#rR=BKve5ynE_!nIM6@mAx56ze=lpEP)|wlx1~^K$j=x#^Oev-kis6LDtNYNUo< zdxY7AZ!Z~a?+#AKw5;;0PZRYWoLPvK{03E>M@gyfd!KPHUy%7Ixgh^}VK}+Y&$Me9 zjzm`p_xUwZtRJ3n_FfpBkhP3OpZWdeTU;N z&)S9iVR-VYF{Na93J0mQcB($ADfKL|$L4J{bMtcd(ap}c#~=_{@b$B&;$c}W|CNnl zv`qcx)vYd0zIM(eOoAnS{Rh7PFE0s@+Ns%f$DOhHn^>t^69l zdBAEbd<{15IZ(A~h}TGXYI$}njIK#mRBW%!o%a=X*e7-GbGvGfwUn#0pjMdG>qizQ zf;V@uH6lmK$b?7I&{4p}(CV@gXgwm z4t+ICIjI2o0J{p%u&DP6o&$=*n6byN$PU13%V&de&1nVg$4l&vyVEd({s23QmUBuc z5SO3JApvT{ff}~mDI&&5YQkq>z&M)*luHE#jP;-f zDX1_dP`(CEE(q|EV3uyY_WltL_h9i|WGAdJ&?RU}tC4c+xW#e5^@iUoVG4+Coq#`a zC7hCqlNch$Adi%I1@z-?+@h)Hpg!)`#_VsJ+y*E|6KHao?jRCSusZf3RRE0A3Wyy$ zdkg7>?maNo$mqkTSg)yoAtxfhq`wZQv;&m3+`yktSd|JT3Jr>dOhAhWG88clCnqNk zKjs>vq#8H@7K5BPRQ&T4pm+W^-+%T&Ys5tY z`8{sM)N8_XKT>w9Yhz|+fCO+wDQiTD*d$|_=*0kQ^*4JJq`%@;2;4~=;CTy+Y@Yt_ zV*$2XkJcxPPm+3!Y|}1cqWv{LgVSN_0P?WUI~;+bL3o=m?=;nDCIDRWG-M|q=m=O5 zHG74ae?2V7VQ)eP?oN2AW^w{nu;HTM@pz4UZvLR}zn&;)s#@6sa`~j85*nnXak;Wb z`MzKiG>gB35TOsqpb%*4b_|5$LAci`F!!7fwYLB@bq7!_3(V>sC8u|HsC-Bc$Bkm=L+f^ON_IpO7-1qxeK#dxo`vF ze}eIq`D4a3z%nAdgfIYRYo!YNQc#zso4zAQwK-PC-eLPr|wgWVZRPj-nlp&tzgxX)rNACGMLK4C5D?Ax zy{nksZ<&LS6$6Xrl)~6wGwANQQ!ezESluD8d1sSg=>VFu9=nvNlD|vcs_0G!XH1^O z$^H-yV1*8WdQD)eTw!9;OAk7(Ii>{(-RNY4_i<>LW7LW`nUFgsE^FdE111q`RekEO z=bdg%-Wxy$OqHp8STk(${s2y@eGRJ zH-tLIg)dq%ml?Y;Ld#*ko8Csjg#I3C3U1c8kv9^mJ47+3M zzU@2-m{^_kE_lK2xkj6aqKkYsQgvi46dlEXm zu6V7CLGka2ydO8!hI*cGGAnzm2;6`G9EkDgFQj>y7htP5j+Xj$>26!jlvXX0*wO-m z3W{kW)Txzo!x$e!XZe6jl>>D;wcZ*2?a>!>mJ8i`&5r$s_@8wAu*vFfzq-=ERjBLH zd-nCb@({fzElzQ{; zgPR1(J#Jf{jJ=N`#QTNZjlX`7AC(OPJ7-7rF01PZ3cR0)%nJKVriU^WBHE|n>9VVf z&(h^;S5gje&4yAol=57p4Sxh!ogefotgDs-CvHkO8#>+6_FGf2y4tx~rJnzDj7%X$ zz(_32K`1d+Ld-!8fjdN3IWRN2rfZY@=h4rIRQzm${miSP`7Q(;5dj$vQ~Fq_Yli$} zcg-zE8CkGn#2;svYPnjI+o3k-T-ioA0DzIVC@%C8&9<$|6H=i>rpqHGMny81z% z)kK`f%V`D%k3&aTRvHM1EZC2;8tO}mTAu%N*%!rA*kSh{Z_(u>kr@^D%|ys6h@+JX z=yG*s16iL9mD=-<&qHS;tWufWUu|2!y`N9BpHz5V2lRhP`Vy!&ob5Vv6!I1P zl%zw@Kd-pH0DYkw56UE{nS$qo!Lu+<441$Xcun?TJ%pWvG$S0q)doS`y={+PPD~?y zPD!WKZ^>KzJ(_i4sFet~U&KJqW&>XpSOz-4nFUq|AMoCQMoj>B2WU`05d}ZIf8V-M bXCKS{9LUTRYsz&3#Sk@Rgi?vZy~qCtN1;(< literal 0 HcmV?d00001 From feac7c2b69081e8cba219c3edc5b3503ebec46db Mon Sep 17 00:00:00 2001 From: wanghesong2019 <51843210+wanghesong2019@users.noreply.github.com> Date: Thu, 2 Feb 2023 20:54:26 +0800 Subject: [PATCH 13/25] Delete tyr.jpg --- img/tyr.jpg | 1 - 1 file changed, 1 deletion(-) delete mode 100644 img/tyr.jpg diff --git a/img/tyr.jpg b/img/tyr.jpg deleted file mode 100644 index 8b13789..0000000 --- a/img/tyr.jpg +++ /dev/null @@ -1 +0,0 @@ - From 3187f928a02918986ca725aa88720aa1adb3197e Mon Sep 17 00:00:00 2001 From: wanghesong2019 <51843210+wanghesong2019@users.noreply.github.com> Date: Thu, 2 Feb 2023 20:55:57 +0800 Subject: [PATCH 14/25] Rename README.md to README_back.md --- README.md => README_back.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename README.md => README_back.md (100%) diff --git a/README.md b/README_back.md similarity index 100% rename from README.md rename to README_back.md From 0b4c94dab2946b38ae4faaf320f1a06058b66cfd Mon Sep 17 00:00:00 2001 From: wanghesong2019 <51843210+wanghesong2019@users.noreply.github.com> Date: Thu, 2 Feb 2023 22:05:00 +0800 Subject: [PATCH 15/25] Create README.md --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..79efdf4 --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ +# BERT-pytorch源码阅读记录和各模块调试流程 +Source code:Google AI 2018 BERT pytorch implementation\ +调试者:wanghesong2019 +# 项目背景 + Google 这个代码写的真是漂亮, 结构清晰;但原始repository只给出了2句简单的命令行,只是执行下它们很难让我清晰了解到bert模型的内部运行机制,故下决心对该项目代码一行行啃; + 阅读心得以注释的方式加在了代码中;调式过程单独起一个py文件,供自己和看官参考; +# 整体框架 + From 32f3c81e1c95e2ac61eb0077cde94c2c8ba537b9 Mon Sep 17 00:00:00 2001 From: wanghesong2019 <51843210+wanghesong2019@users.noreply.github.com> Date: Thu, 2 Feb 2023 22:13:54 +0800 Subject: [PATCH 16/25] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 79efdf4..07159b2 100644 --- a/README.md +++ b/README.md @@ -5,4 +5,6 @@ Source code:Google AI 2018 BERT pytorch implementation\ Google 这个代码写的真是漂亮, 结构清晰;但原始repository只给出了2句简单的命令行,只是执行下它们很难让我清晰了解到bert模型的内部运行机制,故下决心对该项目代码一行行啃; 阅读心得以注释的方式加在了代码中;调式过程单独起一个py文件,供自己和看官参考; # 整体框架 - +![BERT-pytorch框架](https://raw.githubusercontent.com/wanghesong2019/BERT-pytorch/master/img/1.PNG) +在上面bertEmbedding-loss-train模式中,bertEmbedding无疑是最重要的,因为其不仅包含bert的3种embedding,还实现了bert的2种pre-training目标,具体构成如下:\ +![BERT-pytorch框架](https://raw.githubusercontent.com/wanghesong2019/BERT-pytorch/master/img/2.PNG) From 9c13e0f2af37247ace6e111a48481e922a8adbbf Mon Sep 17 00:00:00 2001 From: wanghesong2019 <51843210+wanghesong2019@users.noreply.github.com> Date: Thu, 2 Feb 2023 22:39:58 +0800 Subject: [PATCH 17/25] upload vocab file --- data/vocab.small | Bin 0 -> 418 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 data/vocab.small diff --git a/data/vocab.small b/data/vocab.small new file mode 100644 index 0000000000000000000000000000000000000000..a4092d62b4ae69f3bcdf96b79810c3fa939e7fd0 GIT binary patch literal 418 zcmYL_J5R$f6os2tLt3Ekhmd$}u=E#HU6|077#Qe;Dkm|$fiy0*Q_%ql1~zXs;_u>E zDtP(M@v(I8@rVEYAFST#>refzuT-=q z=^`sq`lg|#qOy3|J4Z#7iM?}GB>tVJ8Y?3Hp86_y;`CK$)l68P(?n*hFfP1dWg#io zJ3A|!(>!4-s=-mNWEDJ##=5HhTo%jRCeE!55>iz3g(O=mkaC-QB+sjhpvaaejVizN zy;goEnb41>QZO~0X4=rSu{1p_O#{RFwAy{9v4cHq;RyRU#4e6;fNfe0WLdQ?o Date: Sun, 5 Feb 2023 19:04:54 +0800 Subject: [PATCH 18/25] =?UTF-8?q?dataset.py=E7=94=9F=E6=88=90sentence=20pa?= =?UTF-8?q?ir=E7=9A=84=E9=80=BB=E8=BE=91=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bert_pytorch/dataset/dataset.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/bert_pytorch/dataset/dataset.py b/bert_pytorch/dataset/dataset.py index 575d58e..1de3624 100644 --- a/bert_pytorch/dataset/dataset.py +++ b/bert_pytorch/dataset/dataset.py @@ -14,20 +14,22 @@ def __init__(self, corpus_path, vocab, seq_len, encoding="utf-8", corpus_lines=N self.corpus_path = corpus_path self.encoding = encoding - with open(corpus_path, "r", encoding=encoding) as f: - if self.corpus_lines is None and not on_memory: + with open(corpus_path, "r", encoding=encoding) as f: + #读取预料库后分下面2种情况处理: + if self.corpus_lines is None and not on_memory: #如果不将语料库直接加载到内存,则需先确定语料库行数 for _ in tqdm.tqdm(f, desc="Loading Dataset", total=corpus_lines): self.corpus_lines += 1 if on_memory: + #数据集全部加载到内存 self.lines = [line[:-1].split('\t') - for line in tqdm.tqdm(f, desc="Loading Dataset", total=corpus_lines)] - self.corpus_lines = len(self.lines) + for line in tqdm.tqdm(f, desc="Loading Dataset", total=corpus_lines)] #对预料库每行根据\t字符分成2个sentence + self.corpus_lines = len(self.lines) #获取语料库行数 - if not on_memory: #下面的file和random_file是什么作用? + if not on_memory: self.file = open(corpus_path, "r", encoding=encoding) self.random_file = open(corpus_path, "r", encoding=encoding) - + #错位抽取负样本,作用是什么? for _ in range(random.randint(self.corpus_lines if self.corpus_lines < 1000 else 1000)): self.random_file.__next__() From c5cca985ade848941b438e935eb67b4d6edbf5cc Mon Sep 17 00:00:00 2001 From: wanghesong2019 Date: Mon, 6 Feb 2023 20:14:13 +0800 Subject: [PATCH 19/25] =?UTF-8?q?BERTDataset=E7=B1=BB=E9=AD=94=E6=9C=AF?= =?UTF-8?q?=E6=96=B9=E6=B3=95=5F=5Fgetitem=5F=5F(self,=20item)=E6=B3=A8?= =?UTF-8?q?=E9=87=8A=E7=90=86=E8=A7=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bert_pytorch/dataset/dataset.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/bert_pytorch/dataset/dataset.py b/bert_pytorch/dataset/dataset.py index 1de3624..080dd51 100644 --- a/bert_pytorch/dataset/dataset.py +++ b/bert_pytorch/dataset/dataset.py @@ -21,7 +21,7 @@ def __init__(self, corpus_path, vocab, seq_len, encoding="utf-8", corpus_lines=N self.corpus_lines += 1 if on_memory: - #数据集全部加载到内存 + #数据集全部加载到内存,语料库解析成list类型的self.liines属性 self.lines = [line[:-1].split('\t') for line in tqdm.tqdm(f, desc="Loading Dataset", total=corpus_lines)] #对预料库每行根据\t字符分成2个sentence self.corpus_lines = len(self.lines) #获取语料库行数 @@ -37,13 +37,14 @@ def __len__(self): return self.corpus_lines def __getitem__(self, item): - #对按索引item随机选出的sentence pair进行mask和padding填充 - t1, t2, is_next_label = self.random_sent(item) - t1_random, t1_label = self.random_word(t1) - t2_random, t2_label = self.random_word(t2) + #魔术方法__getitem__的定义,功能令类的实例对象向list那样根据索引item取值 + #BERTDataset类实例化返回的bert对象均会进行Next Sentence操作和Masked LM操作 + t1, t2, is_next_label = self.random_sent(item) #Next Sentence操作 + t1_random, t1_label = self.random_word(t1) #Masked LM操作, 其中t1_label表示t1各个位置被masked的类别索引,参看vocab.py中Vocab类的初始化定义 + t2_random, t2_label = self.random_word(t2) # [CLS] tag = SOS tag, [SEP] tag = EOS tag - t1 = [self.vocab.sos_index] + t1_random + [self.vocab.eos_index] + t1 = [self.vocab.sos_index] + t1_random + [self.vocab.eos_index] #论文Figure2 t2 = t2_random + [self.vocab.eos_index] t1_label = [self.vocab.pad_index] + t1_label + [self.vocab.pad_index] @@ -53,7 +54,7 @@ def __getitem__(self, item): bert_input = (t1 + t2)[:self.seq_len] bert_label = (t1_label + t2_label)[:self.seq_len] - padding = [self.vocab.pad_index for _ in range(self.seq_len - len(bert_input))] + padding = [self.vocab.pad_index for _ in range(self.seq_len - len(bert_input))] #最大长度和实际长度之差就是需要padding的位置数量 bert_input.extend(padding), bert_label.extend(padding), segment_label.extend(padding) output = {"bert_input": bert_input, @@ -95,14 +96,13 @@ def random_word(self, sentence): return tokens, output_label def random_sent(self, index): - t1, t2 = self.get_corpus_line(index) - - # output_text, label(isNotNext:0, isNext:1) - #以50%的概率返回原始的(sentence,next_sentence) pair,否则对next_sentence随机采样 + t1, t2 = self.get_corpus_line(index) + # for sentence A and B, 50% of the time B is the actual next sentence that follows A(labeled as NotNext) + # and for 50% of the time it is a random sentence from the corpus(labeled as NotNext) if random.random() > 0.5: - return t1, t2, 1 + return t1, t2, 1 #1表示isNext else: - return t1, self.get_random_line(), 0 + return t1, self.get_random_line(), 0 #0表示isNotNext def get_corpus_line(self, item): if self.on_memory: From 9c7a2fd934c7f427cea522c570e12a4525c8ee17 Mon Sep 17 00:00:00 2001 From: wanghesong2019 Date: Fri, 10 Feb 2023 23:57:00 +0800 Subject: [PATCH 20/25] =?UTF-8?q?=E4=BF=AE=E6=94=B9bert=5Fpytorch/dataset/?= =?UTF-8?q?dataset.py=E7=9A=84=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bert_pytorch/dataset/dataset.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/bert_pytorch/dataset/dataset.py b/bert_pytorch/dataset/dataset.py index 080dd51..7a28577 100644 --- a/bert_pytorch/dataset/dataset.py +++ b/bert_pytorch/dataset/dataset.py @@ -65,8 +65,9 @@ def __getitem__(self, item): return {key: torch.tensor(value) for key, value in output.items()} def random_word(self, sentence): + #sentence转换成sentence中的token在token-index词典中对应的index tokens = sentence.split() - output_label = [] #真正被masked的token用mask_index(=4)填充,随机替换的用随机数填充,其他用token在词表self.vocab.stoi对应的索引填充; + output_label = [] #该列表只存0和非0数字,0表示对应位置的token属于85%没被替换的,非0数字是对应位置的token在被mask处理前的vocab中对应的index for i, token in enumerate(tokens): prob = random.random() @@ -83,14 +84,15 @@ def random_word(self, sentence): elif prob < 0.9: tokens[i] = random.randrange(len(self.vocab)) - # 10% randomly change token to current token + # 10% doesn't change current token else: tokens[i] = self.vocab.stoi.get(token, self.vocab.unk_index) output_label.append(self.vocab.stoi.get(token, self.vocab.unk_index)) else: - tokens[i] = self.vocab.stoi.get(token, self.vocab.unk_index) + tokens[i] = self.vocab.stoi.get(token, self.vocab.unk_index) #未被masked的词,用其在vocab中真正的index填充 + #具体地,self.vocab.unk_index=1,上句相当于从stoi token-index字典 output_label.append(0) return tokens, output_label From 33cf7186e741219c25fe0805a9fd2fb3581420a7 Mon Sep 17 00:00:00 2001 From: wanghesong2019 Date: Sat, 11 Feb 2023 00:26:20 +0800 Subject: [PATCH 21/25] =?UTF-8?q?bert=5Fpytorch/trainer/pretrain.py?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bert_pytorch/trainer/pretrain.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/bert_pytorch/trainer/pretrain.py b/bert_pytorch/trainer/pretrain.py index 0b882dd..5072099 100644 --- a/bert_pytorch/trainer/pretrain.py +++ b/bert_pytorch/trainer/pretrain.py @@ -43,7 +43,8 @@ def __init__(self, bert: BERT, vocab_size: int, # This BERT model will be saved every epoch self.bert = bert # Initialize the BERT Language Model, with BERT model - self.model = BERTLM(bert, vocab_size).to(self.device) + self.model = BERTLM(bert, vocab_size).to(self.device) + #BERTLM类在BERT类对输入编码的基础上返回 Masked LM和Next Sentence Prediction的预测结果 # Distributed GPU training if CUDA can detect more than 1 GPU if with_cuda and torch.cuda.device_count() > 1: @@ -100,12 +101,14 @@ def iteration(self, epoch, data_loader, train=True): # 1. forward the next_sentence_prediction and masked_lm model next_sent_output, mask_lm_output = self.model.forward(data["bert_input"], data["segment_label"]) - + #data["bert_input"]中的token的mask,以及data["segment_label"]对句子对是否是前后句的标记,其标签都是句子自带的,故是无监督学习 + #data["bert_input"], data["segment_label"]是要进行的LM mask和nsp的样本 + #data["bert_label"]和data['is_next']是标签 # 2-1. NLL(negative log likelihood) loss of is_next classification result next_loss = self.criterion(next_sent_output, data["is_next"]) # 2-2. NLLLoss of predicting masked token word - mask_loss = self.criterion(mask_lm_output.transpose(1, 2), data["bert_label"]) + mask_loss = self.criterion(mask_lm_output.transpose(1, 2), data["bert_label"]) #bert的与训练目标本质上个分类问题,故用NLLLnull() # 2-3. Adding next_loss and mask_loss : 3.4 Pre-training Procedure loss = next_loss + mask_loss @@ -118,9 +121,9 @@ def iteration(self, epoch, data_loader, train=True): # next sentence prediction accuracy correct = next_sent_output.argmax(dim=-1).eq(data["is_next"]).sum().item() - avg_loss += loss.item() - total_correct += correct - total_element += data["is_next"].nelement() + avg_loss += loss.item() #累计每一次的损失,用来计算平均损失 + total_correct += correct #累计next sentence预测正确的个数 + total_element += data["is_next"].nelement() #tensor元素个数,等于各个维度之积 post_fix = { "epoch": epoch, From e9ffb2ffe9cbbbc2c18f1d610adb901c97a87fbe Mon Sep 17 00:00:00 2001 From: wanghesong2019 Date: Sat, 11 Feb 2023 07:57:45 +0800 Subject: [PATCH 22/25] =?UTF-8?q?=E4=B8=BAbert=5Fpytorch/model/language=5F?= =?UTF-8?q?model.py=E7=9A=84NextSentencePrediction=E5=92=8C=20MaskedLangua?= =?UTF-8?q?geModel=E6=B7=BB=E5=8A=A0=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bert_pytorch/model/language_model.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/bert_pytorch/model/language_model.py b/bert_pytorch/model/language_model.py index 608f42a..7f66d2f 100644 --- a/bert_pytorch/model/language_model.py +++ b/bert_pytorch/model/language_model.py @@ -39,7 +39,9 @@ def __init__(self, hidden): self.softmax = nn.LogSoftmax(dim=-1) def forward(self, x): - return self.softmax(self.linear(x[:, 0])) + #the final hidden state of [CLs] is used as the sequence representation for classification tasks. + #where x[:, 0] representes [CLs] + return self.softmax(self.linear(x[:, 0])) class MaskedLanguageModel(nn.Module): @@ -54,8 +56,8 @@ def __init__(self, hidden, vocab_size): :param vocab_size: total vocab size """ super().__init__() - self.linear = nn.Linear(hidden, vocab_size) + self.linear = nn.Linear(hidden, vocab_size) #Linear(hidden, vocab_size)将输入的hidden维转换成vocab_size维 self.softmax = nn.LogSoftmax(dim=-1) def forward(self, x): - return self.softmax(self.linear(x)) + return self.softmax(self.linear(x)) #x的最后维数是hidden,经self.linear()作用后最后维数变为vocab_size From 7b55204625a888a0da76c8b30c4ddc1fdfa016a3 Mon Sep 17 00:00:00 2001 From: wanghesong2019 <51843210+wanghesong2019@users.noreply.github.com> Date: Sun, 12 Feb 2023 02:10:49 +0800 Subject: [PATCH 23/25] Update README.md --- README.md | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 07159b2..65ea0e1 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,13 @@ -# BERT-pytorch源码阅读记录和各模块调试流程 -Source code:Google AI 2018 BERT pytorch implementation\ -调试者:wanghesong2019 -# 项目背景 - Google 这个代码写的真是漂亮, 结构清晰;但原始repository只给出了2句简单的命令行,只是执行下它们很难让我清晰了解到bert模型的内部运行机制,故下决心对该项目代码一行行啃; - 阅读心得以注释的方式加在了代码中;调式过程单独起一个py文件,供自己和看官参考; -# 整体框架 -![BERT-pytorch框架](https://raw.githubusercontent.com/wanghesong2019/BERT-pytorch/master/img/1.PNG) -在上面bertEmbedding-loss-train模式中,bertEmbedding无疑是最重要的,因为其不仅包含bert的3种embedding,还实现了bert的2种pre-training目标,具体构成如下:\ -![BERT-pytorch框架](https://raw.githubusercontent.com/wanghesong2019/BERT-pytorch/master/img/2.PNG) +# BERT-pytorch学习心得 +在2023年的2月中旬的凌晨2点,我要结束对BERT-pytorch项目的学习了,这是注册github账号之后第1次相对认真系统的学习一个开源项目,从寒假前夕开始,持续直到现在,坚持下来了离开之前,啰嗦2句,以作纪念! +## 1.经验 +- 根据代码,结合bert论文,基本掌握了bert的真面目:包括词典构建和token随机替换,句子对随机采样的dataset模块、基于transformer编码器的encoder架构的modeling模块、包括loss计算和梯度下降的trainner模块; +- 在代码学习的过程中,掌握了git基本操作,github的使用习惯和常见pytorch API用法; +- 开源项目学习最好结合论文看,这样就将理论和实践结合起来了,当然最好是能灌入数据跑起来 + +## 2.教训 +-- 代码逐行看了,也搭建了bert-pytorch环境,但是没有结合数据去运行查看结果,故调参经验并没有增加 +-- 项目学习没有指定里程碑时间表,拖沓 +-- 后续的开源项目学习,一定要结合数据,运行起来 +-- 本来想好好写一篇readme,但是到头有泄气了。 + From 38a931cfc5f660b0ddf5ea331066a4da9389a3e4 Mon Sep 17 00:00:00 2001 From: wanghesong2019 <51843210+wanghesong2019@users.noreply.github.com> Date: Sun, 12 Feb 2023 02:12:09 +0800 Subject: [PATCH 24/25] Update README.md --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 65ea0e1..41a812e 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,12 @@ 在2023年的2月中旬的凌晨2点,我要结束对BERT-pytorch项目的学习了,这是注册github账号之后第1次相对认真系统的学习一个开源项目,从寒假前夕开始,持续直到现在,坚持下来了离开之前,啰嗦2句,以作纪念! ## 1.经验 - 根据代码,结合bert论文,基本掌握了bert的真面目:包括词典构建和token随机替换,句子对随机采样的dataset模块、基于transformer编码器的encoder架构的modeling模块、包括loss计算和梯度下降的trainner模块; -- 在代码学习的过程中,掌握了git基本操作,github的使用习惯和常见pytorch API用法; +- 在代码学习的过程中,掌握了git基本操作,github的使用习惯(自己的注释都合并到了master分支)和常见pytorch API用法; - 开源项目学习最好结合论文看,这样就将理论和实践结合起来了,当然最好是能灌入数据跑起来 ## 2.教训 --- 代码逐行看了,也搭建了bert-pytorch环境,但是没有结合数据去运行查看结果,故调参经验并没有增加 --- 项目学习没有指定里程碑时间表,拖沓 --- 后续的开源项目学习,一定要结合数据,运行起来 --- 本来想好好写一篇readme,但是到头有泄气了。 +- 代码逐行看了,也搭建了bert-pytorch环境,但是没有结合数据去运行查看结果,故调参经验并没有增加 +- 项目学习没有指定里程碑时间表,拖沓 +- 后续的开源项目学习,一定要结合数据,运行起来 +- 本来想好好写一篇readme,但是到头有泄气了。 From cdf357c80212090b8332d737f5a6fa1f388314a7 Mon Sep 17 00:00:00 2001 From: wanghesong2019 <51843210+wanghesong2019@users.noreply.github.com> Date: Mon, 13 Feb 2023 23:03:12 +0800 Subject: [PATCH 25/25] Update README.md --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 41a812e..5772319 100644 --- a/README.md +++ b/README.md @@ -11,3 +11,10 @@ - 后续的开源项目学习,一定要结合数据,运行起来 - 本来想好好写一篇readme,但是到头有泄气了。 +--- +# bert理解记录 + +## 20230213:今天和大华同事明浩讨论了bert的embedding部分:由token到初始化的embedding向量是怎么实现的?他认为初始化的embedding向量会参与到训练学习中,但是晚上我又看了下该项目,发现本项目的embedding模块只是承担着token的随机初始化过程,之后就会进到attention模块,先线性投影成querey,key,value之后就开始了注意力机制的计算;由此可以认为embedding模块还只是数据预处理的一部分,是不会参与到训练中的; + +另外一个问题是为什么可以随机初始化embedding?我认为主要是token的索引就是随机的(现到先得),也就是说不管是token的index,还是初始的embedding向量,只要固定好key-value关系即可,不含任何的语义信息;这样就能圆回来了:如果初始化的embedding是模型参数,参与到学习训练,就破环了key-value关系的确定性; +