oceansweep commited on
Commit
c96320d
1 Parent(s): cd5e862

Upload Confabulation_check.py

Browse files
App_Function_Libraries/Confabulation_check.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Confabulation_check.py
2
+ #
3
+ # This file contains the functions that are used to check the confabulation of the user's input.
4
+ #
5
+ #
6
+ # Imports
7
+ #
8
+ # External Imports
9
+ #
10
+ # Local Imports
11
+ #
12
+ #
13
+ ####################################################################################################
14
+ #
15
+ # Functions:
16
+ from App_Function_Libraries.Chat import chat_api_call
17
+ from App_Function_Libraries.ms_g_eval import validate_inputs, detailed_api_error
18
+
19
+
20
+ def simplified_geval(transcript: str, summary: str, api_name: str, api_key: str, temp: float = 0.7) -> str:
21
+ """
22
+ Perform a simplified version of G-Eval using a single query to evaluate the summary.
23
+
24
+ Args:
25
+ transcript (str): The original transcript
26
+ summary (str): The summary to be evaluated
27
+ api_name (str): The name of the LLM API to use
28
+ api_key (str): The API key for the chosen LLM
29
+ temp (float, optional): The temperature parameter for the API call. Defaults to 0.7.
30
+
31
+ Returns:
32
+ str: The evaluation result
33
+ """
34
+ try:
35
+ validate_inputs(transcript, summary, api_name, api_key)
36
+ except ValueError as e:
37
+ return str(e)
38
+
39
+ prompt = f"""You are an AI assistant tasked with evaluating the quality of a summary. You will be given an original transcript and a summary of that transcript. Your task is to evaluate the summary based on the following criteria:
40
+
41
+ 1. Coherence (1-5): How well-structured and organized is the summary?
42
+ 2. Consistency (1-5): How factually aligned is the summary with the original transcript?
43
+ 3. Fluency (1-3): How well-written is the summary in terms of grammar, spelling, and readability?
44
+ 4. Relevance (1-5): How well does the summary capture the important information from the transcript?
45
+
46
+ Please provide a score for each criterion and a brief explanation for your scoring. Then, give an overall assessment of the summary's quality.
47
+
48
+ Original Transcript:
49
+ {transcript}
50
+
51
+ Summary to Evaluate:
52
+ {summary}
53
+
54
+ Please provide your evaluation in the following format:
55
+ Coherence: [score] - [brief explanation]
56
+ Consistency: [score] - [brief explanation]
57
+ Fluency: [score] - [brief explanation]
58
+ Relevance: [score] - [brief explanation]
59
+
60
+ Overall Assessment: [Your overall assessment of the summary's quality]
61
+ """
62
+
63
+ try:
64
+ result = chat_api_call(
65
+ api_name,
66
+ api_key,
67
+ prompt,
68
+ "",
69
+ temp=temp,
70
+ system_message="You are a helpful AI assistant tasked with evaluating summaries."
71
+ )
72
+ except Exception as e:
73
+ return detailed_api_error(api_name, e)
74
+
75
+ formatted_result = f"""
76
+ Confabulation Check Results:
77
+
78
+ {result}
79
+ """
80
+
81
+ return formatted_result