Spaces:
Running
Running
File size: 3,222 Bytes
9be5a22 5cc9296 9be5a22 f58c9c5 9be5a22 f58c9c5 9be5a22 7e4e512 9be5a22 7e4e512 9be5a22 a002819 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import streamlit as st
import transformers as tf
import pandas as pd
from overview import NQDOverview
# Function to load and cache models
@st.experimental_singleton(show_spinner=False)
def load_model(username, prefix, model_name):
p = tf.pipeline('text-classification', f'{username}/{prefix}-{model_name}', return_all_scores=True)
return p
@st.experimental_singleton(show_spinner=False)
def load_pickle(f):
return pd.read_pickle(f)
def get_results(model, c):
res = model(c)[0]
scores = [r['score'] for r in res]
label = max(range(len(scores)), key=lambda i: scores[i])
# label = float(res['label'].split('_')[1])
# scores = res['score']
return {'label': label, 'scores': scores}
def run_models(model_names, models, c):
results = {}
for mn in model_names:
results[mn] = get_results(models[mn], c)
return results
st.title('Assess the *QuAL*ity of your feedback')
st.caption(
"""Medical education requires high-quality *written* feedback,
but evaluating these *supervisor narrative comments* is time-consuming.
The QuAL score has validity evidence for measuring the quality of short
comments in this context. We developed a NLP/ML-powered tool to
assess written comment quality via the QuAL score with high accuracy.
*Try it for yourself!*
""")
### Load models
# Specify which models to load
USERNAME = 'maxspad'
PREFIX = 'nlp-qual'
models_to_load = ['qual', 'q1', 'q2i', 'q3i']
n_models = float(len(models_to_load))
models = {}
# Show a progress bar while models are downloading,
# then hide it when done
lc_placeholder = st.empty()
loader_container = lc_placeholder.container()
loader_container.caption('Loading models... please wait...')
pbar = loader_container.progress(0.0)
for i, mn in enumerate(models_to_load):
pbar.progress((i+1.0) / n_models)
models[mn] = load_model(USERNAME, PREFIX, mn)
lc_placeholder.empty()
### Load example data
examples = load_pickle('test.pkl')
### Process input
ex = examples['comment'].sample(1).tolist()[0]
try:
ex = ex.strip().replace('_x000D_', '').replace('nan', 'blank')
except:
ex = 'blank'
if 'comment' not in st.session_state:
st.session_state['comment'] = ex
with st.form('comment_form'):
comment = st.text_area('Try a comment:', value=st.session_state['comment'])
left_col, right_col = st.columns([1,9], gap='medium')
submitted = left_col.form_submit_button('Submit')
trying_example = right_col.form_submit_button('Try an example!')
if submitted:
st.session_state['button_clicked'] = 'submit'
st.session_state['comment'] = comment
st.experimental_rerun()
elif trying_example:
st.session_state['button_clicked'] = 'example'
st.session_state['comment'] = ex
st.experimental_rerun()
results = run_models(models_to_load, models, st.session_state['comment'])
# Modify results to sum the QuAL score and to ignore Q3 if Q2 no suggestion
if results['q2i']['label'] == 1:
results['q3i']['label'] = 1 # can't have connection if no suggestion
results['qual']['label'] = results['q1']['label'] + (not results['q2i']['label']) + (not results['q3i']['label'])
overview = NQDOverview(st, results)
overview.draw() |