Spaces:

oceansweep
/

tldw

Running

File size: 2,773 Bytes

43cd37c

# Confabulation_check.py
#
# This file contains the functions that are used to check the confabulation of the user's input.
#
#
# Imports
#
# External Imports
#
# Local Imports
#
#
####################################################################################################
#
# Functions:
from App_Function_Libraries.Chat import chat_api_call
from App_Function_Libraries.Benchmarks_Evaluations.ms_g_eval import validate_inputs, detailed_api_error


def simplified_geval(transcript: str, summary: str, api_name: str, api_key: str, temp: float = 0.7) -> str:
    """

    Perform a simplified version of G-Eval using a single query to evaluate the summary.



    Args:

        transcript (str): The original transcript

        summary (str): The summary to be evaluated

        api_name (str): The name of the LLM API to use

        api_key (str): The API key for the chosen LLM

        temp (float, optional): The temperature parameter for the API call. Defaults to 0.7.



    Returns:

        str: The evaluation result

    """
    try:
        validate_inputs(transcript, summary, api_name, api_key)
    except ValueError as e:
        return str(e)

    prompt = f"""You are an AI assistant tasked with evaluating the quality of a summary. You will be given an original transcript and a summary of that transcript. Your task is to evaluate the summary based on the following criteria:



1. Coherence (1-5): How well-structured and organized is the summary?

2. Consistency (1-5): How factually aligned is the summary with the original transcript?

3. Fluency (1-3): How well-written is the summary in terms of grammar, spelling, and readability?

4. Relevance (1-5): How well does the summary capture the important information from the transcript?



Please provide a score for each criterion and a brief explanation for your scoring. Then, give an overall assessment of the summary's quality.



Original Transcript:

{transcript}



Summary to Evaluate:

{summary}



Please provide your evaluation in the following format:

Coherence: [score] - [brief explanation]

Consistency: [score] - [brief explanation]

Fluency: [score] - [brief explanation]

Relevance: [score] - [brief explanation]



Overall Assessment: [Your overall assessment of the summary's quality]

"""

    try:
        result = chat_api_call(
            api_name,
            api_key,
            prompt,
            "",
            temp=temp,
            system_message="You are a helpful AI assistant tasked with evaluating summaries."
        )
    except Exception as e:
        return detailed_api_error(api_name, e)

    formatted_result = f"""

    Confabulation Check Results:



    {result}

    """

    return formatted_result