maxspad commited on
Commit
7e4e512
β€’
1 Parent(s): a002819

blurb added, q3 now depends on q2, if q2 no suggestion, q3 auto no link

Browse files
Files changed (2) hide show
  1. app.py +11 -5
  2. overview.py +44 -3
app.py CHANGED
@@ -3,8 +3,6 @@ import transformers as tf
3
  import pandas as pd
4
 
5
  from overview import NQDOverview
6
- from fullreport import NQDFullReport
7
-
8
 
9
  # Function to load and cache models
10
  @st.experimental_singleton(show_spinner=False)
@@ -33,9 +31,13 @@ def run_models(model_names, models, c):
33
 
34
  st.title('Assess the *QuAL*ity of your feedback')
35
  st.caption(
36
- """Medical education *requires* high-quality feedback, but evaluating feedback
37
- is difficult and time-consuming. This tool uses NLP/ML to predict a validated
38
- feedback quality metric known as the QuAL Score. *Try it for yourself!*
 
 
 
 
39
  """)
40
 
41
  ### Load models
@@ -83,6 +85,10 @@ with st.form('comment_form'):
83
  st.experimental_rerun()
84
 
85
  results = run_models(models_to_load, models, st.session_state['comment'])
 
 
 
 
86
 
87
  overview = NQDOverview(st, results)
88
  overview.draw()
 
3
  import pandas as pd
4
 
5
  from overview import NQDOverview
 
 
6
 
7
  # Function to load and cache models
8
  @st.experimental_singleton(show_spinner=False)
 
31
 
32
  st.title('Assess the *QuAL*ity of your feedback')
33
  st.caption(
34
+ """Medical education requires high-quality *written* feedback,
35
+ but evaluating these *supervisor narrative comments* is time-consuming.
36
+ The QuAL score has validity evidence for measuring the quality of short
37
+ comments in this context. We developed a NLP/ML-powered tool to
38
+ assess written comment quality via the QuAL score with high accuracy.
39
+
40
+ *Try it for yourself!*
41
  """)
42
 
43
  ### Load models
 
85
  st.experimental_rerun()
86
 
87
  results = run_models(models_to_load, models, st.session_state['comment'])
88
+ # Modify results to sum the QuAL score and to ignore Q3 if Q2 no suggestion
89
+ if results['q2i']['label'] == 1:
90
+ results['q3i']['label'] = 1 # can't have connection if no suggestion
91
+ results['qual']['label'] = results['q1']['label'] + (not results['q2i']['label']) + (not results['q3i']['label'])
92
 
93
  overview = NQDOverview(st, results)
94
  overview.draw()
overview.py CHANGED
@@ -1,6 +1,44 @@
1
  from matplotlib.cm import get_cmap
2
  import plotly.graph_objects as go
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  class NQDOverview(object):
5
  def __init__(self, parent, results,
6
  dial_cmap='RdYlGn'):
@@ -32,6 +70,9 @@ class NQDOverview(object):
32
  def draw(self):
33
  st = self.p
34
 
 
 
 
35
  fig = self._build_figure()
36
 
37
  cols = st.columns([7, 3])
@@ -48,7 +89,7 @@ class NQDOverview(object):
48
  elif q1lab == 3:
49
  md_str = '😁 High'
50
  cols[1].metric('Level of Detail', md_str,
51
- help='How specific was the evaluator in describing the behavior?')
52
 
53
  q2lab = self.results['q2i']['label']
54
  if q2lab == 0:
@@ -56,7 +97,7 @@ class NQDOverview(object):
56
  else:
57
  md_str = '❌ No'
58
  cols[1].metric('Suggestion Given', (md_str),
59
- help='Did the evaluator give a suggestion for improvement?')
60
 
61
  q3lab = self.results['q3i']['label']
62
  if q3lab == 0:
@@ -64,4 +105,4 @@ class NQDOverview(object):
64
  else:
65
  md_str = '❌ No'
66
  cols[1].metric('Suggestion Linked', md_str,
67
- help='Is the suggestion for improvement linked to the described behavior?')
 
1
  from matplotlib.cm import get_cmap
2
  import plotly.graph_objects as go
3
 
4
+ about_blurb = '''
5
+ ### About the QuAL Score
6
+
7
+ The Quality of Assessment for Learning score (QuAL score),
8
+ was created to evaluate short qualitative comments that are related to specific
9
+ scores entered into a workplace-based assessment,
10
+ common within the competency-based medical education (CBME) context.
11
+
12
+ It is rated on a scale of 0-5, with 0 signifying very low quality and 5 very high quality.
13
+ It consists of three subscores which are summed to calculate the overall QuAL score:
14
+
15
+ 1. Evidence - Does the rater provide sufficient evidence about resident performance? (0-no comment at all, 1-no, but comment present, 2-somewhat, 3-yes/full description)
16
+ 2. Suggestion - Does the rater provide a suggestion for improvement? (0-no/1-yes)
17
+ 3. Connection - Is the rater's suggestion linked to the behavior described? (0-no/1-yes)
18
+
19
+ The QuAL score has validity evidence for accurately measuring the quality of evaluation comments in CBME.
20
+
21
+ For more information, see the paper [here](https://doi.org/10.1080/10401334.2019.1708365).
22
+
23
+ ### About this Tool
24
+
25
+ The QuAL score accurately rates the quality of narrative comments in CBME, but
26
+ it still requires time-consuming manual rating. With large volumes of text generated in a
27
+ typical CBME program, large-scale assessment of comment quality is impractical.
28
+ This tool uses machine learning (ML) and natural langugage processing (NLP) to automate
29
+ the rating of the QuAL score on narratie comments.
30
+
31
+ We trained a machine learning model to predict each of the three subscores described above.
32
+ The resulting models are accurate:
33
+ 1. Evidence - Balanced accuracy of 61.5% for a 0-3 result, within-one accuracy of 96.4%
34
+ 2. Suggestion - Accuracy of 85%, sensitivity for lack of suggestion 86.2%
35
+ 3. Connection - Accuracy of 82%, sensitivity for lack of connection 90%
36
+
37
+ The models are highly accurate, but not perfect! You may experience times where
38
+ the results are not consistent with your interpretation of the text. If you do, please
39
+ leave us [feedback](https://forms.gle/PfXxcGmvLYvd9jWz5). This tool is intendened as a demonstration only
40
+ and should not be used for high-stakes assessment (yet!).
41
+ '''
42
  class NQDOverview(object):
43
  def __init__(self, parent, results,
44
  dial_cmap='RdYlGn'):
 
70
  def draw(self):
71
  st = self.p
72
 
73
+ with st.expander('About the QuAL Score and this Tool', expanded=False):
74
+ st.markdown(about_blurb)
75
+
76
  fig = self._build_figure()
77
 
78
  cols = st.columns([7, 3])
 
89
  elif q1lab == 3:
90
  md_str = '😁 High'
91
  cols[1].metric('Level of Detail', md_str,
92
+ help='Q1 - Evidence - Does the rater provide sufficient evidence about resident performance? (0-no comment at all, 1-no, but comment present, 2-somewhat, 3-yes/full description)')
93
 
94
  q2lab = self.results['q2i']['label']
95
  if q2lab == 0:
 
97
  else:
98
  md_str = '❌ No'
99
  cols[1].metric('Suggestion Given', (md_str),
100
+ help='Q2 - Suggestion - Does the rater provide a suggestion for improvement? (0-no/1-yes)')
101
 
102
  q3lab = self.results['q3i']['label']
103
  if q3lab == 0:
 
105
  else:
106
  md_str = '❌ No'
107
  cols[1].metric('Suggestion Linked', md_str,
108
+ help='Q3 - Connection - Is the rater’s suggestion linked to the behavior described? (0-no/1-yes)')