from api_wrappers import grazie_wrapper


def build_prompt_ref(prediction, reference):
    return f"""Evaluate the following commit message based on clarity, specificity, context, and conciseness without 
providing any additional feedback or commentary:

START OF THE COMMIT MESSAGE YOU HAVE TO EVALUATE
{prediction}
END OF THE COMMIT MESSAGE YOU HAVE TO EVALUATE

For reference, consider this as an example of a good commit message for the same commit that is both concise and 
specific: 
START OF THE REFERENCE COMMIT MESSAGE 
{reference}
END OF THE REFERENCE COMMIT MESSAGE

YOUR TASK: Provide a single number as a response, representing the rating on a scale from 1 to 10, where 1 is the 
lowest quality and 10 is the highest quality. Do not include any other text or explanation in your response.
"""


def build_prompt_noref(prediction, diff):
    return f"""Evaluate the following commit message based on clarity, specificity, context, and conciseness without 
providing any additional feedback or commentary:

START OF THE COMMIT MESSAGE YOU HAVE TO EVALUATE
{prediction}
END OF THE COMMIT MESSAGE YOU HAVE TO EVALUATE

These are the code changes included in the commit: 
START OF THE CODE CHANGES
{diff}
END OF THE CODE CHANGES

YOUR TASK: Provide a single number as a response, representing the rating on a scale from 1 to 10, where 1 is the 
lowest quality and 10 is the highest quality. Do not include any other text or explanation in your response.
"""


N_RETRIES = 3


def get_number_for_prompt(prompt):
    outputs = []
    result = None

    for i in range(N_RETRIES):
        try:
            output = grazie_wrapper.generate_for_prompt(prompt).strip().split()[-1]
            outputs.append(output)

            result = int(output)
            break
        except ValueError:
            continue

    if result is None:
        raise RuntimeError(f"LLM cannot generate a number. Its outputs were: {str(outputs)}")

    return result


def compute_ref(prediction, reference, n_requests):
    prompt = build_prompt_ref(prediction, reference)
    results = [
        get_number_for_prompt(prompt)
        for _ in range(n_requests)
    ]

    return sum(results) / len(results)


def compute_noref(prediction, diff, n_requests):
    prompt = build_prompt_noref(prediction, diff)
    results = [
        get_number_for_prompt(prompt)
        for _ in range(n_requests)
    ]

    return sum(results) / len(results)