import argparse |
import json |
import os |
from pathlib import Path |
import torch |
from tqdm import tqdm |
data_abs_dir = Path(__file__).parent / "data" |
from human_eval.evaluation import evaluate_functional_correctness |
from transformers import AutoModelForCausalLM, AutoTokenizer |
from utils.utils import extract_generation_code, languge_settings |
def build_deepseekcoder_instruction(languge: str, question: str): |
return """ |
Please continue to complete the function. You are not allowed to modify the given code and do the completion only. Please return all completed function in a codeblock. Here is the given code to do completion: |
```{} |
{} |
``` |
""".strip().format( |
languge.lower(), question.strip() |
) |
def generate_one(example, lang, tokenizer, model): |
prompt = build_deepseekcoder_instruction( |
languge_settings[lang]["full_name"], example["prompt"] |
) |
inputs = tokenizer.apply_chat_template( |
[{"role": "user", "content": prompt}], |
return_tensors="pt", |
add_generation_prompt=True, |
).to(model.device) |
stop_id = tokenizer.convert_tokens_to_ids("<|EOT|>") |
assert isinstance(stop_id, int), "Invalid tokenizer, EOT id not found" |
outputs = model.generate( |
inputs, |
max_new_tokens=1024, |
do_sample=False, |
pad_token_id=stop_id, |
eos_token_id=stop_id, |
) |
output = tokenizer.decode(outputs[0][len(inputs[0]) :], skip_special_tokens=True) |
example["output"] = output |
return extract_generation_code(example, lang_code=lang) |
def generate_main(args): |
model_name_or_path = args.model |
lang = args.language |
saved_path = args.output_path |
temp_dir = args.temp_dir |
os.makedirs(temp_dir, exist_ok=True) |
problem_file = os.path.join(data_abs_dir, f"humaneval-{lang}.jsonl") |
print("model", model_name_or_path) |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) |
print( |
"load tokenizer {} from {} over.".format( |
tokenizer.__class__, model_name_or_path |
) |
) |
model = AutoModelForCausalLM.from_pretrained( |
model_name_or_path, |
torch_dtype=torch.bfloat16, |
device_map="auto", |
) |
model.eval() |
examples = [json.loads(x) for x in open(problem_file) if x.strip()] |
print("Read {} examples for evaluation over.".format(len(examples))) |
generated_examples = [] |
for ex in tqdm(examples, desc="Generating"): |
gen_example = generate_one(ex, args.language, tokenizer, model) |
generated_examples.append(gen_example) |
print("Generate all over!!!") |
with open(saved_path, "w", encoding="utf-8") as fw: |
for ex in generated_examples: |
fw.write(json.dumps(ex) + "\n") |
print( |
"Save {} processed examples into {} over!".format( |
len(generated_examples), saved_path |
) |
) |
result = evaluate_functional_correctness( |
input_file=saved_path, |
tmp_dir=temp_dir, |
n_workers=8, |
timeout=3.0, |
problem_file=problem_file, |
language=lang, |
) |
print(lang, result, model_name_or_path) |
pass |
def evaluation_only(args): |
lang = args.language |
temp_dir = args.temp_dir |
assert os.path.exists(args.output_path), "Not fond output file: {}".format( |
args.output_path |
) |
os.makedirs(temp_dir, exist_ok=True) |
output_name = os.path.basename(args.output_path) |
output_examples = [json.loads(x) for x in open(args.output_path) if x.strip()] |
processed_examples = [ |
extract_generation_code(ex, lang) for ex in tqdm(output_examples, "Processing") |
] |
processed_path = os.path.join(temp_dir, output_name) |
with open(processed_path, "w", encoding="utf-8") as fw: |
for ex in processed_examples: |
fw.write(json.dumps(ex) + "\n") |
print( |
"Save {} processed examples into {} over!".format( |
len(processed_examples), processed_path |
) |
) |
problem_file = os.path.join(data_abs_dir, f"humaneval-{lang}.jsonl") |
from human_eval.evaluation import evaluate_functional_correctness |
result = evaluate_functional_correctness( |
input_file=processed_path, |
tmp_dir=temp_dir, |
n_workers=8, |
timeout=3.0, |
problem_file=problem_file, |
language=lang, |
) |
print(lang, result) |
if __name__ == "__main__": |
parser = argparse.ArgumentParser() |
parser.add_argument( |
"--model", |
type=str, |
help="model name or path", |
default="/data0/pretrained-models/deepseek-coder-6.7b-instruct", |
) |
parser.add_argument( |
"--output_path", |
type=str, |
help="output path of your generation", |
default="/home/qyhuang/DeepSeek-Coder/outputs/deepseek-chat.json", |
) |
parser.add_argument("--language", type=str, help="langauge", default="python") |
parser.add_argument( |
"--temp_dir", type=str, help="temp dir for evaluation", default="tmp" |
) |
args = parser.parse_args() |
os.environ["TOKENIZERS_PARALLELISM"] = "false" |
generate_main(args) |
pass |