nlp-qual-space / overview.py
maxspad's picture
linearized the gauge
8e11190
from matplotlib.cm import get_cmap
import plotly.graph_objects as go
import hydralit_components as hc
about_blurb = '''
### About the QuAL Score
The Quality of Assessment for Learning score (QuAL score),
was created to evaluate short qualitative comments that are related to specific
scores entered into a workplace-based assessment,
common within the competency-based medical education (CBME) context.
It is rated on a scale of 0-5, with 0 signifying very low quality and 5 very high quality.
It consists of three subscores which are summed to calculate the overall QuAL score:
1. Evidence - Does the rater provide sufficient evidence about resident performance? (0-no comment at all, 1-no, but comment present, 2-somewhat, 3-yes/full description)
2. Suggestion - Does the rater provide a suggestion for improvement? (0-no/1-yes)
3. Connection - Is the rater's suggestion linked to the behavior described? (0-no/1-yes)
The QuAL score has validity evidence for accurately measuring the quality of evaluation comments in CBME.
For more information, see the paper [here](https://doi.org/10.1080/10401334.2019.1708365).
### About this Tool
The QuAL score accurately rates the quality of narrative comments in CBME, but
it still requires time-consuming manual rating. With large volumes of text generated in a
typical CBME program, large-scale assessment of comment quality is impractical.
This tool uses machine learning (ML) and natural langugage processing (NLP) to automate
the rating of the QuAL score on narratie comments.
We trained a machine learning model to predict each of the three subscores described above.
The resulting models are accurate:
1. Evidence - Balanced accuracy of 61.5% for a 0-3 result, within-one accuracy of 96.4%
2. Suggestion - Accuracy of 85%, sensitivity for lack of suggestion 86.2%
3. Connection - Accuracy of 82%, sensitivity for lack of connection 90%
The models are highly accurate, but not perfect! You may experience times where
the results are not consistent with your interpretation of the text. If you do, please
leave us [feedback](https://forms.gle/PfXxcGmvLYvd9jWz5). This tool is intendened as a demonstration only
and should not be used for high-stakes assessment (yet!).
'''
class NQDOverview(object):
def __init__(self, parent, results,
dial_cmap='RdYlGn'):
self.p = parent
self.results = results
self.cmap = get_cmap(dial_cmap)
def _get_color(self):
lab = self.results['qual']['label']
if lab == 0:
color = '#ffffff'
elif lab == 1:
color = '#dc3545'
elif lab == 2:
color = '#f60'
elif lab == 3:
color = '#ffc107'
elif lab == 4:
color = '#6ea728'
elif lab == 5:
color = '#28a745'
# color = self.cmap(self.results['qual']['label'] / 6.0)
# color = f'rgba({int(color[0]*256)}, {int(color[1]*256)}, {int(color[2]*256)}, {int(color[3]*256)})'
return color
def _build_figure(self):
fig = go.Figure(go.Indicator(
mode = "number+gauge", value = self.results['qual']['label'],
domain = {'x': [0.1, 1], 'y': [0, 1]},
title = {'text' :"<b>QuAL:</b>"},
gauge = {
'shape': "bullet",
'axis': {'range': [-0.5, 5.5]},
'steps': [
{'range': [-0.5, 0.5], 'color': "maroon"},
{'range': [0.5, 1.5], 'color': 'indianred'},
{'range': [1.5, 2.5], 'color': "orange"},
{"range": [2.5, 3.5], 'color': 'gold'},
{'range': [3.5,4.5], 'color': 'lightgreen'},
{'range': [4.5,5.5], 'color': 'green'}
],
'bar': {
'color': 'rgba(123, 123, 123, 0.85)',
'thickness': 0.7
}}))
fig.update_layout(margin=go.Margin(t=25, b=20), height=125)
return fig
def draw(self):
st = self.p
with st.expander('About the QuAL Score and this Tool', expanded=False):
st.markdown(about_blurb)
fig = self._build_figure()
st.plotly_chart(fig, use_container_width=True)
cols = st.columns(3)
with cols[0]:
q1lab = self.results['q1']['label']
if q1lab == 0:
md_str = 'πŸ˜₯ None'
elif q1lab == 1:
md_str = '😐 Low'
elif q1lab == 2:
md_str = '😊 Medium'
elif q1lab == 3:
md_str = '😁 High'
# prog_score, prog_theme = self.get_prog_setup('q1')
# hc.info_card(title='Level of Detail', content=md_str, sentiment='good', bar_value=prog_score)
st.metric('Level of Detail', md_str,
help='Q1 - Evidence - Does the rater provide sufficient evidence about resident performance? (0-no comment at all, 1-no, but comment present, 2-somewhat, 3-yes/full description)')
prog_score, prog_theme = self.get_prog_setup('q1')
# hc.progress_bar(prog_score, f'{prog_score:.2f}% confident', override_theme=prog_theme)
with cols[1]:
q2lab = self.results['q2i']['label']
if q2lab == 0:
md_str = 'βœ… Yes'
else:
md_str = '❌ No'
st.metric('Suggestion Given', (md_str),
help='Q2 - Suggestion - Does the rater provide a suggestion for improvement? (0-no/1-yes)')
prog_score, prog_theme = self.get_prog_setup('q2i')
# hc.progress_bar(prog_score, f'{prog_score:.2f}% confident', override_theme=prog_theme)
with cols[2]:
q3lab = self.results['q3i']['label']
if q3lab == 0:
md_str = 'βœ… Yes'
else:
md_str = '❌ No'
st.metric('Suggestion Linked', md_str,
help='Q3 - Connection - Is the rater’s suggestion linked to the behavior described? (0-no/1-yes)')
prog_score, prog_theme = self.get_prog_setup('q3i')
# hc.progress_bar(prog_score, f'{prog_score:.2f}% confident', override_theme=prog_theme)
def get_prog_setup(self, q):
prog_score = self.results[q]['scores'][self.results[q]['label']] * 100
if prog_score > 75:
prog_sent = '#28a745'
elif (prog_score > 25) and (prog_score <= 75):
prog_sent = '#ffc107'
else:
prog_sent = '#dc3545'
prog_theme = {'content_color': 'white', 'progress_color': '#aaa'}
return prog_score, prog_theme