|
import time |
|
|
|
from api_wrappers import grazie_wrapper |
|
|
|
|
|
def build_prompt(prediction, reference): |
|
return f"""Your task is to rate the quality of the generated commit message using the scale from 1 to 5. |
|
|
|
A good commit message has to be concise. |
|
Assign lower scores for the commit messages that are too verbose for a commit message. |
|
|
|
The generated commit message you have to evaluate: |
|
START OF THE GENERATED COMMIT MESSAGE |
|
{prediction} |
|
END OF THE GENERATED COMMIT MESSAGE |
|
|
|
Here is an example of an ideal reference commit message for the same commit: |
|
START OF THE REFERENCE COMMIT MESSAGE |
|
{reference} |
|
END OF THE REFERENCE COMMIT MESSAGE |
|
|
|
All the information in the reference commit message is true. |
|
|
|
Print only one integer number after the token "OUTPUT" - the rating of the generated commit message. |
|
Do not print anything that is not an integer. |
|
|
|
OUTPUT |
|
""" |
|
|
|
|
|
N_RETRIES = 3 |
|
|
|
|
|
def compute(prediction, reference): |
|
prompt = build_prompt(prediction, reference) |
|
outputs = [] |
|
|
|
for i in range(N_RETRIES): |
|
try: |
|
output = grazie_wrapper.generate_for_prompt(prompt).strip()[-1] |
|
outputs.append(output) |
|
return int(output) |
|
except ValueError: |
|
continue |
|
|
|
raise RuntimeError(f"GPT4 cannot generate a number. Its outputs were: {str(outputs)}") |
|
|