@@ -21,7 +21,7 @@ def test_download(self):
2121 print (embed (words ).size ())
2222
2323 def test_gpt2_embedding (self ):
24- weight_path = 'test /data_for_tests/embedding/small_gpt2'
24+ weight_path = 'tests /data_for_tests/embedding/small_gpt2'
2525 vocab = Vocabulary ().add_word_lst ("this is a texta sentence" .split ())
2626 embed = GPT2Embedding (vocab , model_dir_or_name = weight_path , word_dropout = 0.1 )
2727 requires_grad = embed .requires_grad
@@ -49,7 +49,7 @@ def test_gpt2_embedding(self):
4949 def test_gpt2_ebembedding_2 (self ):
5050 # 测试only_use_pretrain_vocab与truncate_embed是否正常工作
5151 Embedding = GPT2Embedding
52- weight_path = 'test /data_for_tests/embedding/small_gpt2'
52+ weight_path = 'tests /data_for_tests/embedding/small_gpt2'
5353 vocab = Vocabulary ().add_word_lst ("this is a texta and" .split ())
5454 embed1 = Embedding (vocab , model_dir_or_name = weight_path ,layers = list (range (3 )),
5555 only_use_pretrain_bpe = True , truncate_embed = True , min_freq = 1 )
@@ -89,13 +89,13 @@ def test_gpt2_ebembedding_2(self):
8989 def test_gpt2_tokenizer (self ):
9090 from fastNLP .modules .tokenizer import GPT2Tokenizer
9191
92- tokenizer = GPT2Tokenizer .from_pretrained ('test /data_for_tests/embedding/small_gpt2' )
92+ tokenizer = GPT2Tokenizer .from_pretrained ('tests /data_for_tests/embedding/small_gpt2' )
9393 print (tokenizer .encode ("this is a texta a sentence" ))
9494 print (tokenizer .encode ('this is' ))
9595
9696 def test_gpt2_embed_eq_gpt2_piece_encoder (self ):
9797 # 主要检查一下embedding的结果与wordpieceencoder的结果是否一致
98- weight_path = 'test /data_for_tests/embedding/small_gpt2'
98+ weight_path = 'tests /data_for_tests/embedding/small_gpt2'
9999 ds = DataSet ({'words' : ["this is a texta a sentence" .split (), 'this is' .split ()]})
100100 encoder = GPT2WordPieceEncoder (model_dir_or_name = weight_path )
101101 encoder .eval ()
@@ -187,7 +187,7 @@ def test_generate_small_gpt2(self):
187187
188188 print (used_pairs )
189189 import json
190- with open ('test /data_for_tests/embedding/small_gpt2/vocab.json' , 'w' ) as f :
190+ with open ('tests /data_for_tests/embedding/small_gpt2/vocab.json' , 'w' ) as f :
191191 new_used_vocab = {}
192192 for idx , key in enumerate (used_vocab .keys ()):
193193 new_used_vocab [key ] = len (new_used_vocab )
@@ -201,12 +201,12 @@ def test_generate_small_gpt2(self):
201201
202202 json .dump (new_used_vocab , f )
203203
204- with open ('test /data_for_tests/embedding/small_gpt2/merges.txt' , 'w' ) as f :
204+ with open ('tests /data_for_tests/embedding/small_gpt2/merges.txt' , 'w' ) as f :
205205 f .write ('#version: small\n ' )
206206 for k ,v in sorted (sorted (used_pairs .items (), key = lambda kv :kv [1 ])):
207207 f .write ('{} {}\n ' .format (k [0 ], k [1 ]))
208208
209- new_tokenizer = GPT2Tokenizer .from_pretrained ('test /data_for_tests/embedding/small_gpt2' )
209+ new_tokenizer = GPT2Tokenizer .from_pretrained ('tests /data_for_tests/embedding/small_gpt2' )
210210 new_all_tokens = []
211211 for sent in [sent1 , sent2 , sent3 ]:
212212 tokens = new_tokenizer .tokenize (sent , add_prefix_space = True )
@@ -227,21 +227,21 @@ def test_generate_small_gpt2(self):
227227 "n_positions" : 20 ,
228228 "vocab_size" : len (new_used_vocab )
229229 }
230- with open ('test /data_for_tests/embedding/small_gpt2/config.json' , 'w' ) as f :
230+ with open ('tests /data_for_tests/embedding/small_gpt2/config.json' , 'w' ) as f :
231231 json .dump (config , f )
232232
233233 # 生成更小的merges.txt与vocab.json, 方法是通过记录tokenizer中的值实现
234234 from fastNLP .modules .encoder .gpt2 import GPT2LMHeadModel , GPT2Config
235235
236- config = GPT2Config .from_pretrained ('test /data_for_tests/embedding/small_gpt2' )
236+ config = GPT2Config .from_pretrained ('tests /data_for_tests/embedding/small_gpt2' )
237237
238238 model = GPT2LMHeadModel (config )
239- torch .save (model .state_dict (), 'test /data_for_tests/embedding/small_gpt2/small_pytorch_model.bin' )
239+ torch .save (model .state_dict (), 'tests /data_for_tests/embedding/small_gpt2/small_pytorch_model.bin' )
240240 print (model (torch .LongTensor ([[0 ,1 ,2 ,3 ]])))
241241
242242 def test_gpt2_word_piece_encoder (self ):
243243 # 主要检查可以运行
244- weight_path = 'test /data_for_tests/embedding/small_gpt2'
244+ weight_path = 'tests /data_for_tests/embedding/small_gpt2'
245245 ds = DataSet ({'words' : ["this is a test sentence" .split ()]})
246246 embed = GPT2WordPieceEncoder (model_dir_or_name = weight_path , word_dropout = 0.1 )
247247 embed .index_datasets (ds , field_name = 'words' )
@@ -256,7 +256,7 @@ def test_gpt2_word_piece_encoder(self):
256256
257257 @unittest .skipIf ('TRAVIS' in os .environ , "Skip in travis" )
258258 def test_generate (self ):
259- # weight_path = 'test /data_for_tests/embedding/small_gpt2'
259+ # weight_path = 'tests /data_for_tests/embedding/small_gpt2'
260260 weight_path = 'en'
261261
262262 encoder = GPT2WordPieceEncoder (model_dir_or_name = weight_path , language_model = True )
0 commit comments