Zekun Wu commited on
Commit
0eb1a66
·
1 Parent(s): 171bf3d
Files changed (2) hide show
  1. app.py +40 -2
  2. evaluator.py +2 -0
app.py CHANGED
@@ -14,6 +14,45 @@ examples = {
14
  }
15
  }
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  # Function to check password
19
  def check_password():
@@ -62,7 +101,6 @@ else:
62
  eval = evaluator(model_name)
63
  scores = eval(question, explanation)
64
  st.write('### Scores')
65
- for principle, score in scores.items():
66
- st.write(f"{principle}: {score}")
67
  else:
68
  st.error('Please enter both a question and an explanation to evaluate.')
 
14
  }
15
  }
16
 
17
+ def write_evaluation_commentary(scores):
18
+ for principle, score in scores.items():
19
+ if principle == "Factually Correct":
20
+ if score >= 0.8:
21
+ comment = "Excellent accuracy! The information is precise and directly relevant to the question."
22
+ elif score >= 0.5:
23
+ comment = "Moderately accurate, but some details may not be completely correct or are somewhat irrelevant."
24
+ else:
25
+ comment = "The explanation contains significant inaccuracies or irrelevant information."
26
+ elif principle == "Useful":
27
+ if score >= 0.8:
28
+ comment = "Highly useful! The explanation clearly enhances understanding and aids in further reasoning or decision-making."
29
+ elif score >= 0.5:
30
+ comment = "Somewhat useful, though it could be more insightful or practical in aiding understanding."
31
+ else:
32
+ comment = "The explanation does little to help understand or apply the information provided."
33
+ elif principle == "Context Specific":
34
+ if score >= 0.8:
35
+ comment = "Perfectly tailored to the context of the question, addressing the specific scenario effectively."
36
+ elif score >= 0.5:
37
+ comment = "Generally addresses the context, but may miss specific details or nuances relevant to the question."
38
+ else:
39
+ comment = "Fails to address the context of the question, lacking relevance or specificity."
40
+ elif principle == "User Specific":
41
+ if score >= 0.8:
42
+ comment = "The explanation is well-adapted to the user's knowledge level and interests, demonstrating thoughtfulness."
43
+ elif score >= 0.5:
44
+ comment = "Moderately considerate of the user's knowledge level, but could be more tailored."
45
+ else:
46
+ comment = "Does not consider the user's background or interests, potentially leading to confusion or disinterest."
47
+ elif principle == "Provides Pluralism":
48
+ if score >= 0.8:
49
+ comment = "Provides an excellent range of perspectives or interpretations, fostering a comprehensive understanding."
50
+ elif score >= 0.5:
51
+ comment = "Offers some alternative perspectives, but more could be provided to enrich understanding."
52
+ else:
53
+ comment = "Lacks diversity in viewpoints, limiting the depth of exploration into the topic."
54
+
55
+ st.write(f"{principle} ({score}): {comment}")
56
 
57
  # Function to check password
58
  def check_password():
 
101
  eval = evaluator(model_name)
102
  scores = eval(question, explanation)
103
  st.write('### Scores')
104
+ write_evaluation_commentary(scores)
 
105
  else:
106
  st.error('Please enter both a question and an explanation to evaluate.')
evaluator.py CHANGED
@@ -75,6 +75,8 @@ class evaluator:
75
 
76
  return self.validate_scores(scores)
77
 
 
 
78
  if __name__ == '__main__':
79
  eval = evaluator()
80
  question = "What is the capital of France?"
 
75
 
76
  return self.validate_scores(scores)
77
 
78
+
79
+
80
  if __name__ == '__main__':
81
  eval = evaluator()
82
  question = "What is the capital of France?"