Spaces:
Runtime error
Runtime error
File size: 5,292 Bytes
ce78cc4 8caa5ee a859ad1 ce78cc4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import json
from typing import List, Dict, Tuple, Union, Any
import streamlit as st
from annotated_text import annotated_text
sidebar = st.sidebar
def bs_unigram_match_annotated_text(belief_state_example) -> List[Union[str, Tuple]]:
gold_set = set(belief_state_example['gold'].split(' '))
input_set = set(" ".join(belief_state_example['input']).split(' '))
generated = belief_state_example['generated']
result = []
for word in generated.split(' '):
if word in gold_set:
result.append((word, 'gold', '#dfd')) # gold overlap => label green
elif word in input_set:
result.append((word, 'in', '#eea')) # input overlap => label yellow
else:
result.append(word + ' ') # no overlap => no label (replace space)
return result
# load in data
pptod_examples: List[Dict] = []
models: Dict[str, Dict[str, Any]] = {
'pptod-small': {
'name': 'pptod-small',
'description': 'a T5 model that has been pre-trained on the ToD-BERT dataset **in this data format.** As such, '
'it is familiar with the meaning of these special separator tokens. However, it does not have'
'MultiWoZ training experience, so while it has adapted to the belief state grammar generally, it '
'is unaware of the particular slot name conventions of MultiWoZ.',
'output_file': './output/pptod-small-10-percent.jsonl'
},
't5-small': {
'name': 't5-small',
'description': 'a T5 model with no dialogue experience. Data input has been transformed to exclude special tokens'
'that the model could not be familiar with.',
'output_file': './output/t5-small-10-percent.jsonl'
},
'bart': {
'name': 'bart',
'description': 'a BART model with no dialogue experience. Data input has been transformed to exclude special tokens'
'that the model could not be familiar with.',
'output_file': './output/bart-100ish-examples.jsonl'
},
'dialogpt': {
'name': 'dialogpt',
'description': 'DialoGPT is a (fine-tuned GPT-2) dialogue response generation model for multiturn conversations from 147M Reddit Conversation chains',
'output_file': './output/dialogpt-100ish-examples.jsonl'
},
'my-t5-pptod-checkpoint': {
'name': 'my-t5-pptod-checkpoint',
'description': 'My re-implementation of the pptod approach, in re-structured code',
'output_file': './output/my-pre-train-t5-100ish-examples.jsonl'
},
'my-t5-fine-tune-checkpoint': {
'name': 'my-t5-fine-tune-checkpoint',
'description': 'My re-implementation of the pptod approach, in re-structured code, fine-tuned on MultiWoZ',
'output_file': './output/my-fine-tune-t5-100ish-examples.jsonl'
}
}
for model_def in models.values():
model_def['examples'] = []
with open(model_def['output_file'], 'r') as f:
for line in f.readlines():
model_def['examples'].append(json.loads(line.strip()))
model_names = list(models.keys())
model_name = sidebar.selectbox('Model', model_names)
active_model = models[model_name]
st.write(f"""
#### Inputs
**Selected Model:** `{active_model['name']}`
{active_model['description']}
""")
"""
### Belief State Prediction
Below is the predicted belief state as a sequence.
- `input` denotes the input, which has been transformed into a list for
human readability but is presented to the model as a sequence.
- `gold` is the target belief state in sequence form (slot-name slot-value pairs)
- `generated` is the model generated belief state sequence
"""
titles = [f"{i}: {e[0]['turn_domain'][0]} (Turn {e[0]['turn_num']})" for i, e in enumerate(active_model['examples'])]
title = sidebar.selectbox("Development Example", titles)
active_example = active_model['examples'][int(title[0])][0]
active_belief_spans = active_example['bspn_input'].split("> <")
active_example_bs = {'input':
[ ('<' if i > 0 else '') +
string +
('>' if string[-1] is not '>' and len(active_belief_spans) > 1 else '')
for i, string in enumerate(active_belief_spans)],
'generated': active_example['bspn_gen'],
'gold': active_example['bspn']}
st.write(active_example_bs)
"""
##### Generated Overlap
"""
annotated_text(*bs_unigram_match_annotated_text(active_example_bs))
"""
---
### Response Generation
Below is the predicted response as a sequence.
- `input` denotes the input, which has been transformed into a list for
human readability but is presented to the model as a sequence.
- `gold` is the target response sequence
- `generated` is the model generated response
"""
#title = st.selectbox("Development Example", titles)
active_example_resp = {'input':
[ ('<' if i > 0 else '') +
string +
('>' if string[-1] is not '>' else '')
for i, string in enumerate(active_example['resp_input'].split("> <"))],
'generated': active_example['resp_gen'],
'gold': active_example['resp']}
st.write(active_example_resp)
"""
##### Generated Overlap
"""
annotated_text(*bs_unigram_match_annotated_text(active_example_resp))
|