From 567d0d7d0b508acec8a0cdcfdaadb2ec2a5c0af5 Mon Sep 17 00:00:00 2001 From: Abhineetsoccer <65859971+Abhineetsoccer@users.noreply.github.com> Date: Mon, 12 Aug 2024 23:45:56 +0000 Subject: [PATCH 01/15] Changed mbpp.py --- bigcode_eval/tasks/mbpp.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bigcode_eval/tasks/mbpp.py b/bigcode_eval/tasks/mbpp.py index ccaf1b596..155b789d4 100644 --- a/bigcode_eval/tasks/mbpp.py +++ b/bigcode_eval/tasks/mbpp.py @@ -52,7 +52,7 @@ def get_prompt(self, doc): """ description = doc["text"] test_example = doc["test_list"][0] - prompt = f'"""\n{description}\n{test_example}\n"""\n' + prompt = f'"""\n{description}\n{test_example}\nPseudocode:"""\n' return prompt def get_reference(self, doc): @@ -68,7 +68,9 @@ def postprocess_generation(self, generation, idx): index of doc in the dataset to which the generation belongs """ prompt = self.get_prompt(self.dataset["test"][idx]) - generation = generation[len(prompt) :] + start_idx = generation.find("Code:") + generation = generation[start_idx + len("Code:"):] + print(prompt + self._stop_at_stop_token(generation, self.stop_words)) return prompt + self._stop_at_stop_token(generation, self.stop_words) def process_results(self, generations, references): From 4f63dee5447ed73e18ca1253c3cdc68da8770485 Mon Sep 17 00:00:00 2001 From: Abhineetsoccer <65859971+Abhineetsoccer@users.noreply.github.com> Date: Tue, 13 Aug 2024 11:11:34 +0100 Subject: [PATCH 02/15] Update mbpp.py making it more aligned to the training style --- bigcode_eval/tasks/mbpp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigcode_eval/tasks/mbpp.py b/bigcode_eval/tasks/mbpp.py index 155b789d4..11a2b061e 100644 --- a/bigcode_eval/tasks/mbpp.py +++ b/bigcode_eval/tasks/mbpp.py @@ -52,7 +52,7 @@ def get_prompt(self, doc): """ description = doc["text"] test_example = doc["test_list"][0] - prompt = f'"""\n{description}\n{test_example}\nPseudocode:"""\n' + prompt = f'"""\n{description}\n{test_example}\nPseudocode:\n"""' return prompt def get_reference(self, doc): @@ -70,7 +70,7 @@ def postprocess_generation(self, generation, idx): prompt = self.get_prompt(self.dataset["test"][idx]) start_idx = generation.find("Code:") generation = generation[start_idx + len("Code:"):] - print(prompt + self._stop_at_stop_token(generation, self.stop_words)) + print(generation) return prompt + self._stop_at_stop_token(generation, self.stop_words) def process_results(self, generations, references): From 2b9705d4e49946c91048a0d124211533bf6fd02f Mon Sep 17 00:00:00 2001 From: Abhineetsoccer <65859971+Abhineetsoccer@users.noreply.github.com> Date: Tue, 13 Aug 2024 13:51:58 +0100 Subject: [PATCH 03/15] Update mbpp.py updated mbpp with task description --- bigcode_eval/tasks/mbpp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigcode_eval/tasks/mbpp.py b/bigcode_eval/tasks/mbpp.py index 11a2b061e..90f50be87 100644 --- a/bigcode_eval/tasks/mbpp.py +++ b/bigcode_eval/tasks/mbpp.py @@ -52,7 +52,7 @@ def get_prompt(self, doc): """ description = doc["text"] test_example = doc["test_list"][0] - prompt = f'"""\n{description}\n{test_example}\nPseudocode:\n"""' + prompt = f'"""Task Description: \n\n{description}\n{test_example}\n\nPseudocode:\n\n"""' return prompt def get_reference(self, doc): From 023e6288a1b477149a4c33268b5887b1ec83a42c Mon Sep 17 00:00:00 2001 From: Abhineetsoccer <65859971+Abhineetsoccer@users.noreply.github.com> Date: Tue, 13 Aug 2024 16:12:04 +0100 Subject: [PATCH 04/15] Update mbpp.py integrated multistep code generation --- bigcode_eval/tasks/mbpp.py | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/bigcode_eval/tasks/mbpp.py b/bigcode_eval/tasks/mbpp.py index 90f50be87..772e204a5 100644 --- a/bigcode_eval/tasks/mbpp.py +++ b/bigcode_eval/tasks/mbpp.py @@ -12,6 +12,9 @@ from bigcode_eval.base import Task from bigcode_eval.tasks.custom_metrics.code_eval import compute_code_eval +from transformers import AutoModelForCausalLM, AutoTokenizer +import torch + _CITATION = """ @article{austin2021program, @@ -35,6 +38,10 @@ def __init__(self): stop_words=["\nclass", "\nassert", '\n"""', "\nprint", "\nif", "\n<|/", "\n```"], requires_execution=True, ) + checkpoint = "/mnt/roma/abhineet/output_dir_p/checkpoint-5640" + self.model = AutoModelForCausalLM.from_pretrained(checkpoint) + self.tokenizer = AutoTokenizer.from_pretrained(checkpoint) + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def get_dataset(self): """Returns dataset for the task or an iterable of any object, that get_prompt can handle""" @@ -45,14 +52,31 @@ def get_dataset(self): ), "please ensure you have the latest version of MBPP dataset, try deleting its old cache" return dataset + def generate_prompt(self, doc): + description = doc["text"] + test_example = doc["test_list"][0] + prompt = f'"""Task Description: \n\n{description}\n{test_example}\n\nPseudocode:\n\n"""' + self.model.to(self.device) + inputs = self.tokenizer.encode(prompt, return_tensors = "pt") + outputs = self.model.generate(inputs, max_new_tokens = 300) + output = tokenizer.decode(outputs[0]) + start_idx = output.find("Code:") + return output[:start_idx] + + + def get_prompt(self, doc): """Builds the prompt for the LM to generate from. MBPP prompt is built following to InCoder (Fried et al.) approach prompt = docstring that includes one test """ - description = doc["text"] - test_example = doc["test_list"][0] - prompt = f'"""Task Description: \n\n{description}\n{test_example}\n\nPseudocode:\n\n"""' + # description = doc["text"] + # test_example = doc["test_list"][0] + # prompt = f'"""Task Description: \n\n{description}\n{test_example}\n\nPseudocode:\n\n"""' + prompt_with_pseudocode = self.generate_prompt(doc) + prompt = prompt_with_pseudocode + "\n\n\nCode:\n\n" + + return prompt def get_reference(self, doc): From 83c58863ae163f3a5fa225822959078f2b025205 Mon Sep 17 00:00:00 2001 From: Abhineetsoccer <65859971+Abhineetsoccer@users.noreply.github.com> Date: Tue, 13 Aug 2024 16:14:46 +0100 Subject: [PATCH 05/15] device correction --- bigcode_eval/tasks/mbpp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigcode_eval/tasks/mbpp.py b/bigcode_eval/tasks/mbpp.py index 772e204a5..de1130c6e 100644 --- a/bigcode_eval/tasks/mbpp.py +++ b/bigcode_eval/tasks/mbpp.py @@ -57,7 +57,7 @@ def generate_prompt(self, doc): test_example = doc["test_list"][0] prompt = f'"""Task Description: \n\n{description}\n{test_example}\n\nPseudocode:\n\n"""' self.model.to(self.device) - inputs = self.tokenizer.encode(prompt, return_tensors = "pt") + inputs = self.tokenizer.encode(prompt, return_tensors = "pt").to(self.device) outputs = self.model.generate(inputs, max_new_tokens = 300) output = tokenizer.decode(outputs[0]) start_idx = output.find("Code:") From ede5cb45e6dbf87e3d0477f08494d567ce935645 Mon Sep 17 00:00:00 2001 From: Abhineetsoccer <65859971+Abhineetsoccer@users.noreply.github.com> Date: Tue, 13 Aug 2024 16:16:28 +0100 Subject: [PATCH 06/15] tokenizerco --- bigcode_eval/tasks/mbpp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigcode_eval/tasks/mbpp.py b/bigcode_eval/tasks/mbpp.py index de1130c6e..cb403735f 100644 --- a/bigcode_eval/tasks/mbpp.py +++ b/bigcode_eval/tasks/mbpp.py @@ -59,7 +59,7 @@ def generate_prompt(self, doc): self.model.to(self.device) inputs = self.tokenizer.encode(prompt, return_tensors = "pt").to(self.device) outputs = self.model.generate(inputs, max_new_tokens = 300) - output = tokenizer.decode(outputs[0]) + output = self.tokenizer.decode(outputs[0]) start_idx = output.find("Code:") return output[:start_idx] From ada5b483aa9bc9a65341cbd17d7cf25a446c6396 Mon Sep 17 00:00:00 2001 From: Abhineetsoccer <65859971+Abhineetsoccer@users.noreply.github.com> Date: Wed, 14 Aug 2024 11:09:39 +0100 Subject: [PATCH 07/15] Update base.py --- bigcode_eval/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigcode_eval/base.py b/bigcode_eval/base.py index 9468b98cb..a66b2ff56 100644 --- a/bigcode_eval/base.py +++ b/bigcode_eval/base.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod from warnings import warn -from datasets import load_dataset +from datasets import load_dataset, load_from_disk class Task(ABC): @@ -25,7 +25,7 @@ def __init__(self, stop_words=None, requires_execution=True): self.stop_words = stop_words self.requires_execution = requires_execution try: - self.dataset = load_dataset(path=self.DATASET_PATH, name=self.DATASET_NAME) + self.dataset = load_from_disk(path=self.DATASET_PATH) except Exception as e: warn( f"Loading the dataset failed with {str(e)}. This task will use a locally downloaded dataset, not from the HF hub. \ From dd7f911e2cabeb68ed9fd4d4fe5178fc546bdea0 Mon Sep 17 00:00:00 2001 From: Abhineetsoccer <65859971+Abhineetsoccer@users.noreply.github.com> Date: Wed, 14 Aug 2024 11:13:18 +0100 Subject: [PATCH 08/15] Update mbpp.py --- bigcode_eval/tasks/mbpp.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/bigcode_eval/tasks/mbpp.py b/bigcode_eval/tasks/mbpp.py index cb403735f..c90f1358b 100644 --- a/bigcode_eval/tasks/mbpp.py +++ b/bigcode_eval/tasks/mbpp.py @@ -45,7 +45,7 @@ def __init__(self): def get_dataset(self): """Returns dataset for the task or an iterable of any object, that get_prompt can handle""" - dataset = self.dataset["test"] + dataset = self.dataset # the wrong split of mbpp can be loaded with old datasets cache assert ( len(dataset) == 500 @@ -70,11 +70,12 @@ def get_prompt(self, doc): MBPP prompt is built following to InCoder (Fried et al.) approach prompt = docstring that includes one test """ - # description = doc["text"] - # test_example = doc["test_list"][0] - # prompt = f'"""Task Description: \n\n{description}\n{test_example}\n\nPseudocode:\n\n"""' - prompt_with_pseudocode = self.generate_prompt(doc) - prompt = prompt_with_pseudocode + "\n\n\nCode:\n\n" + description = doc["text"] + test_example = doc["test_list"][0] + #prompt = f'"""Task Description: \n\n{description}\n{test_example}\n\nPseudocode:\n\n"""' + prompt = f'"""{doc}\n\nCode:\n\n"""' + #prompt_with_pseudocode = self.generate_prompt(doc) + #prompt = prompt_with_pseudocode + "\n\n\nCode:\n\n" return prompt From cbd2e668dfaee08ec30d28f2df84f20200c404de Mon Sep 17 00:00:00 2001 From: Abhineetsoccer <65859971+Abhineetsoccer@users.noreply.github.com> Date: Wed, 14 Aug 2024 15:21:35 +0100 Subject: [PATCH 09/15] Update mbpp.py added the pseudocodescomponent --- bigcode_eval/tasks/mbpp.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bigcode_eval/tasks/mbpp.py b/bigcode_eval/tasks/mbpp.py index c90f1358b..d36a354c3 100644 --- a/bigcode_eval/tasks/mbpp.py +++ b/bigcode_eval/tasks/mbpp.py @@ -31,7 +31,7 @@ class MBPP(Task): answers, generation settings and evaluation methods. """ - DATASET_PATH = "mbpp" + DATASET_PATH = "/mnt/roma/abhineet/mbpp_withpseudods" def __init__(self): super().__init__( @@ -72,8 +72,9 @@ def get_prompt(self, doc): """ description = doc["text"] test_example = doc["test_list"][0] - #prompt = f'"""Task Description: \n\n{description}\n{test_example}\n\nPseudocode:\n\n"""' - prompt = f'"""{doc}\n\nCode:\n\n"""' + pseudocodes = doc["Pseudocodes"] + prompt = f'"""Task Description: \n\n{description}\n{test_example}{pseudocodes}\nCode:\n\n"""' + #prompt_with_pseudocode = self.generate_prompt(doc) #prompt = prompt_with_pseudocode + "\n\n\nCode:\n\n" From 4594ec1a8606e2de4553dd11f609e086026c921b Mon Sep 17 00:00:00 2001 From: Abhineetsoccer <65859971+Abhineetsoccer@users.noreply.github.com> Date: Wed, 14 Aug 2024 15:30:39 +0100 Subject: [PATCH 10/15] Update base.py --- bigcode_eval/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigcode_eval/base.py b/bigcode_eval/base.py index a66b2ff56..ce376d482 100644 --- a/bigcode_eval/base.py +++ b/bigcode_eval/base.py @@ -25,7 +25,7 @@ def __init__(self, stop_words=None, requires_execution=True): self.stop_words = stop_words self.requires_execution = requires_execution try: - self.dataset = load_from_disk(path=self.DATASET_PATH) + self.dataset = load_from_disk(self.DATASET_PATH) except Exception as e: warn( f"Loading the dataset failed with {str(e)}. This task will use a locally downloaded dataset, not from the HF hub. \ From 073cfcf4c9e4843b02886f893884f5ac18e4cae5 Mon Sep 17 00:00:00 2001 From: Abhineetsoccer <65859971+Abhineetsoccer@users.noreply.github.com> Date: Wed, 14 Aug 2024 15:53:59 +0100 Subject: [PATCH 11/15] Update mbpp.py removed test as we are directly passing test dataset as input --- bigcode_eval/tasks/mbpp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigcode_eval/tasks/mbpp.py b/bigcode_eval/tasks/mbpp.py index d36a354c3..a6248b9c5 100644 --- a/bigcode_eval/tasks/mbpp.py +++ b/bigcode_eval/tasks/mbpp.py @@ -93,7 +93,7 @@ def postprocess_generation(self, generation, idx): :param idx: int index of doc in the dataset to which the generation belongs """ - prompt = self.get_prompt(self.dataset["test"][idx]) + prompt = self.get_prompt(self.dataset[idx]) start_idx = generation.find("Code:") generation = generation[start_idx + len("Code:"):] print(generation) From d34ef07a4a86f00d31dc917ac5e2a4dcdeb0859d Mon Sep 17 00:00:00 2001 From: Abhineetsoccer <65859971+Abhineetsoccer@users.noreply.github.com> Date: Wed, 14 Aug 2024 16:09:25 +0100 Subject: [PATCH 12/15] Update mbpp.py --- bigcode_eval/tasks/mbpp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigcode_eval/tasks/mbpp.py b/bigcode_eval/tasks/mbpp.py index a6248b9c5..a89286463 100644 --- a/bigcode_eval/tasks/mbpp.py +++ b/bigcode_eval/tasks/mbpp.py @@ -73,7 +73,7 @@ def get_prompt(self, doc): description = doc["text"] test_example = doc["test_list"][0] pseudocodes = doc["Pseudocodes"] - prompt = f'"""Task Description: \n\n{description}\n{test_example}{pseudocodes}\nCode:\n\n"""' + prompt = f'"""Task Description: \n\n{description}\n{test_example}{pseudocodes}"""\nCode:\n\n' #prompt_with_pseudocode = self.generate_prompt(doc) #prompt = prompt_with_pseudocode + "\n\n\nCode:\n\n" From 0391732e42868810d9ace193a37abaa08a46eda3 Mon Sep 17 00:00:00 2001 From: Abhineetsoccer <65859971+Abhineetsoccer@users.noreply.github.com> Date: Wed, 14 Aug 2024 20:15:39 +0100 Subject: [PATCH 13/15] Update base.py --- bigcode_eval/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigcode_eval/base.py b/bigcode_eval/base.py index ce376d482..9468b98cb 100644 --- a/bigcode_eval/base.py +++ b/bigcode_eval/base.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod from warnings import warn -from datasets import load_dataset, load_from_disk +from datasets import load_dataset class Task(ABC): @@ -25,7 +25,7 @@ def __init__(self, stop_words=None, requires_execution=True): self.stop_words = stop_words self.requires_execution = requires_execution try: - self.dataset = load_from_disk(self.DATASET_PATH) + self.dataset = load_dataset(path=self.DATASET_PATH, name=self.DATASET_NAME) except Exception as e: warn( f"Loading the dataset failed with {str(e)}. This task will use a locally downloaded dataset, not from the HF hub. \ From 960f3f6de2427d14d707169dd6c66422d5f1b30c Mon Sep 17 00:00:00 2001 From: Abhineetsoccer <65859971+Abhineetsoccer@users.noreply.github.com> Date: Wed, 14 Aug 2024 20:17:51 +0100 Subject: [PATCH 14/15] Update mbpp.py --- bigcode_eval/tasks/mbpp.py | 38 +++++--------------------------------- 1 file changed, 5 insertions(+), 33 deletions(-) diff --git a/bigcode_eval/tasks/mbpp.py b/bigcode_eval/tasks/mbpp.py index a89286463..ccaf1b596 100644 --- a/bigcode_eval/tasks/mbpp.py +++ b/bigcode_eval/tasks/mbpp.py @@ -12,9 +12,6 @@ from bigcode_eval.base import Task from bigcode_eval.tasks.custom_metrics.code_eval import compute_code_eval -from transformers import AutoModelForCausalLM, AutoTokenizer -import torch - _CITATION = """ @article{austin2021program, @@ -31,40 +28,23 @@ class MBPP(Task): answers, generation settings and evaluation methods. """ - DATASET_PATH = "/mnt/roma/abhineet/mbpp_withpseudods" + DATASET_PATH = "mbpp" def __init__(self): super().__init__( stop_words=["\nclass", "\nassert", '\n"""', "\nprint", "\nif", "\n<|/", "\n```"], requires_execution=True, ) - checkpoint = "/mnt/roma/abhineet/output_dir_p/checkpoint-5640" - self.model = AutoModelForCausalLM.from_pretrained(checkpoint) - self.tokenizer = AutoTokenizer.from_pretrained(checkpoint) - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def get_dataset(self): """Returns dataset for the task or an iterable of any object, that get_prompt can handle""" - dataset = self.dataset + dataset = self.dataset["test"] # the wrong split of mbpp can be loaded with old datasets cache assert ( len(dataset) == 500 ), "please ensure you have the latest version of MBPP dataset, try deleting its old cache" return dataset - def generate_prompt(self, doc): - description = doc["text"] - test_example = doc["test_list"][0] - prompt = f'"""Task Description: \n\n{description}\n{test_example}\n\nPseudocode:\n\n"""' - self.model.to(self.device) - inputs = self.tokenizer.encode(prompt, return_tensors = "pt").to(self.device) - outputs = self.model.generate(inputs, max_new_tokens = 300) - output = self.tokenizer.decode(outputs[0]) - start_idx = output.find("Code:") - return output[:start_idx] - - - def get_prompt(self, doc): """Builds the prompt for the LM to generate from. MBPP prompt is built following to InCoder (Fried et al.) approach @@ -72,13 +52,7 @@ def get_prompt(self, doc): """ description = doc["text"] test_example = doc["test_list"][0] - pseudocodes = doc["Pseudocodes"] - prompt = f'"""Task Description: \n\n{description}\n{test_example}{pseudocodes}"""\nCode:\n\n' - - #prompt_with_pseudocode = self.generate_prompt(doc) - #prompt = prompt_with_pseudocode + "\n\n\nCode:\n\n" - - + prompt = f'"""\n{description}\n{test_example}\n"""\n' return prompt def get_reference(self, doc): @@ -93,10 +67,8 @@ def postprocess_generation(self, generation, idx): :param idx: int index of doc in the dataset to which the generation belongs """ - prompt = self.get_prompt(self.dataset[idx]) - start_idx = generation.find("Code:") - generation = generation[start_idx + len("Code:"):] - print(generation) + prompt = self.get_prompt(self.dataset["test"][idx]) + generation = generation[len(prompt) :] return prompt + self._stop_at_stop_token(generation, self.stop_words) def process_results(self, generations, references): From 2de027bebeaf9984d18919830883bdd8cc059a44 Mon Sep 17 00:00:00 2001 From: Abhineetsoccer <65859971+Abhineetsoccer@users.noreply.github.com> Date: Wed, 14 Aug 2024 20:29:55 +0100 Subject: [PATCH 15/15] Update mbpp.py for base eval --- bigcode_eval/tasks/mbpp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigcode_eval/tasks/mbpp.py b/bigcode_eval/tasks/mbpp.py index ccaf1b596..2210c16cc 100644 --- a/bigcode_eval/tasks/mbpp.py +++ b/bigcode_eval/tasks/mbpp.py @@ -52,7 +52,7 @@ def get_prompt(self, doc): """ description = doc["text"] test_example = doc["test_list"][0] - prompt = f'"""\n{description}\n{test_example}\n"""\n' + prompt = f'"""\nText Description:\n\n{description}\n{test_example}\n"""\n\nCode:\n\n' return prompt def get_reference(self, doc):