resolve hard-code path issue & improve readme

Yue Wang · Yue Wang · commit 32291247a75d · 2021-10-05T17:31:59.000+08:00
diff --git a/README.md b/README.md
@@ -91,14 +91,19 @@ We encourage users of this software to tell us about the applications in which t
  
 ## Download 
 * [Pre-trained checkpoints & Fine-tuning data](https://console.cloud.google.com/storage/browser/sfr-codet5-data-research)
+* Fine-tuned checkpoints (TBA)
+* Extra C/C# pre-training data (TBA)
 
 Instructions to download:
 ```
 pip install gsutil
 
+gsutil -m cp -r "gs://sfr-codet5-data-research/data/" .
+
+mkdir pretrained_models; cd pretrained_models
 gsutil -m cp -r \
-  "gs://sfr-codet5-data-research/data/" \
-  "gs://sfr-codet5-data-research/pretrained_models/" \
+  "gs://sfr-codet5-data-research/pretrained_models/codet5_small" \
+  "gs://sfr-codet5-data-research/pretrained_models/codet5_base" \
   .
 ```
 
diff --git a/evaluator/CodeBLEU/bleu.py b/evaluator/CodeBLEU/bleu.py
@@ -16,7 +16,6 @@
 from collections import Counter
 
 from evaluator.CodeBLEU.utils import ngrams
-import pdb
 
 
 def sentence_bleu(
diff --git a/evaluator/CodeBLEU/calc_code_bleu.py b/evaluator/CodeBLEU/calc_code_bleu.py
@@ -1,12 +1,11 @@
 # Copyright (c) Microsoft Corporation. 
 # Licensed under the MIT license.
+# https://github.com/microsoft/CodeXGLUE/tree/main/Code-Code/code-to-code-trans/evaluator/CodeBLEU
 
 # -*- coding:utf-8 -*-
 import argparse
+import os
 from evaluator.CodeBLEU import bleu, weighted_ngram_match, syntax_match, dataflow_match
-# import evaluator.CodeBLEU.weighted_ngram_match
-# import evaluator.CodeBLEU.syntax_match
-# import evaluator.CodeBLEU.dataflow_match
 
 
 def get_codebleu(refs, hyp, lang, params='0.25,0.25,0.25,0.25'):
@@ -36,7 +35,8 @@ def get_codebleu(refs, hyp, lang, params='0.25,0.25,0.25,0.25'):
     ngram_match_score = bleu.corpus_bleu(tokenized_refs, tokenized_hyps)
 
     # calculate weighted ngram match
-    keywords = [x.strip() for x in open('/export/share/wang.y/workspace/CodeT5Full/finetune/evaluator/CodeBLEU/keywords/' + lang + '.txt', 'r', encoding='utf-8').readlines()]
+    root_dir = os.path.dirname(__file__)
+    keywords = [x.strip() for x in open(root_dir + '/keywords/' + lang + '.txt', 'r', encoding='utf-8').readlines()]
 
     def make_weights(reference_tokens, key_word_list):
         return {token: 1 if token in key_word_list else 0.2 for token in reference_tokens}
@@ -78,3 +78,4 @@ def make_weights(reference_tokens, key_word_list):
     args = parser.parse_args()
     code_bleu_score = get_codebleu(args.refs, args.hyp, args.lang, args.params)
     print('CodeBLEU score: ', code_bleu_score)
+
diff --git a/evaluator/CodeBLEU/dataflow_match.py b/evaluator/CodeBLEU/dataflow_match.py
@@ -7,9 +7,10 @@
                                        index_to_code_token,
                                        tree_to_variable_index)
 from tree_sitter import Language, Parser
-import pdb
+import os
+
+root_dir = os.path.dirname(__file__)
 
-parser_path = '/export/share/wang.y/workspace/CodeT5Full/finetune/evaluator/CodeBLEU/parser'
 dfg_function = {
     'python': DFG_python,
     'java': DFG_java,
@@ -26,7 +27,7 @@ def calc_dataflow_match(references, candidate, lang):
 
 
 def corpus_dataflow_match(references, candidates, lang):
-    LANGUAGE = Language('{}/my-languages.so'.format(parser_path), lang)
+    LANGUAGE = Language(root_dir + '/parser/my-languages.so', lang)
     parser = Parser()
     parser.set_language(LANGUAGE)
     parser = [parser, dfg_function[lang]]
diff --git a/evaluator/CodeBLEU/syntax_match.py b/evaluator/CodeBLEU/syntax_match.py
@@ -7,8 +7,9 @@
                                        index_to_code_token,
                                        tree_to_variable_index)
 from tree_sitter import Language, Parser
+import os
 
-parser_path = '/export/share/wang.y/workspace/CodeT5Full/finetune/evaluator/CodeBLEU/parser'
+root_dir = os.path.dirname(__file__)
 dfg_function = {
     'python': DFG_python,
     'java': DFG_java,
@@ -25,7 +26,7 @@ def calc_syntax_match(references, candidate, lang):
 
 
 def corpus_syntax_match(references, candidates, lang):
-    JAVA_LANGUAGE = Language('{}/my-languages.so'.format(parser_path), lang)
+    JAVA_LANGUAGE = Language(root_dir + '/parser/my-languages.so', lang)
     parser = Parser()
     parser.set_language(JAVA_LANGUAGE)
     match_count = 0
diff --git a/run_gen.py b/run_gen.py
@@ -28,8 +28,6 @@
 from tqdm import tqdm
 import multiprocessing
 import time
-import sys
-import pdb
 
 from torch.utils.tensorboard import SummaryWriter
 from torch.utils.data import DataLoader, SequentialSampler, RandomSampler
@@ -112,7 +110,7 @@ def eval_bleu_epoch(args, eval_data, eval_examples, model, tokenizer, split_tag,
                                        max_length=args.max_target_length)
                 top_preds = list(preds.cpu().numpy())
             pred_ids.extend(top_preds)
-    # pdb.set_trace()
+
     pred_nls = [tokenizer.decode(id, skip_special_tokens=True, clean_up_tokenization_spaces=False) for id in pred_ids]
 
     output_fn = os.path.join(args.res_dir, "test_{}.output".format(criteria))
@@ -146,20 +144,17 @@ def eval_bleu_epoch(args, eval_data, eval_examples, model, tokenizer, split_tag,
                     f1.write(gold.target.strip() + '\n')
                     f2.write(gold.source.strip() + '\n')
 
-        if args.task in ['summarize']:
+        if args.task == 'summarize':
             (goldMap, predictionMap) = smooth_bleu.computeMaps(predictions, gold_fn)
             bleu = round(smooth_bleu.bleuFromMaps(goldMap, predictionMap)[0], 2)
         else:
             bleu = round(_bleu(gold_fn, output_fn), 2)
-            if split_tag == 'test' and args.task in ['refine', 'translate', 'concode']:
+            if args.task == 'concode':
                 codebleu = calc_code_bleu.get_codebleu(gold_fn, output_fn, args.lang)
-        # except:
-        #     bleu = 0.0
-        #     codebleu = 0.0
 
         em = np.mean(dev_accs) * 100
         result = {'em': em, 'bleu': bleu}
-        if not args.task == 'summarize' and split_tag == 'test':
+        if args.task == 'concode':
             result['codebleu'] = codebleu * 100
 
     logger.info("***** Eval results *****")
@@ -364,7 +359,7 @@ def main():
         logger.info("  " + "***** Testing *****")
         logger.info("  Batch size = %d", args.eval_batch_size)
 
-        for criteria in ['best-bleu', 'best-ppl']:  # 'best-bleu', 'best-ppl', 'last'
+        for criteria in ['best-bleu', 'best-ppl']:
             file = os.path.join(args.output_dir, 'checkpoint-{}/pytorch_model.bin'.format(criteria))
             logger.info("Reload model from {}".format(file))
             model.load_state_dict(torch.load(file))
@@ -386,5 +381,4 @@ def main():
 
 
 if __name__ == "__main__":
-    # print(' '.join(sys.argv[:]))
     main()