File size: 5,292 Bytes
ce78cc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8caa5ee
 
 
 
 
a859ad1
 
 
 
 
ce78cc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import json
from typing import List, Dict, Tuple, Union, Any

import streamlit as st
from annotated_text import annotated_text
sidebar = st.sidebar
def bs_unigram_match_annotated_text(belief_state_example) -> List[Union[str, Tuple]]:
    gold_set = set(belief_state_example['gold'].split(' '))
    input_set = set(" ".join(belief_state_example['input']).split(' '))
    generated = belief_state_example['generated']
    result = []
    for word in generated.split(' '):
        if word in gold_set:
            result.append((word, 'gold', '#dfd'))  # gold overlap => label green
        elif word in input_set:
            result.append((word, 'in', '#eea'))  # input overlap => label yellow
        else:
            result.append(word + ' ')  # no overlap => no label (replace space)
    return result

# load in data
pptod_examples: List[Dict] = []

models: Dict[str, Dict[str, Any]] = {
    'pptod-small': {
        'name': 'pptod-small',
        'description': 'a T5 model that has been pre-trained on the ToD-BERT dataset **in this data format.** As such, '
                       'it is familiar with the meaning of these special separator tokens. However, it does not have'
                       'MultiWoZ training experience, so while it has adapted to the belief state grammar generally, it '
                       'is unaware of the particular slot name conventions of MultiWoZ.',
        'output_file': './output/pptod-small-10-percent.jsonl'
    },
    't5-small': {
        'name': 't5-small',
        'description': 'a T5 model with no dialogue experience. Data input has been transformed to exclude special tokens'
                       'that the model could not be familiar with.',
        'output_file': './output/t5-small-10-percent.jsonl'
    },
    'bart': {
        'name': 'bart',
        'description': 'a BART model with no dialogue experience. Data input has been transformed to exclude special tokens'
                       'that the model could not be familiar with.',
        'output_file': './output/bart-100ish-examples.jsonl'
    },
    'dialogpt': {
        'name': 'dialogpt',
        'description': 'DialoGPT is a (fine-tuned GPT-2) dialogue response generation model for multiturn conversations from 147M Reddit Conversation chains',
        'output_file': './output/dialogpt-100ish-examples.jsonl'
    },
    'my-t5-pptod-checkpoint': {
        'name': 'my-t5-pptod-checkpoint',
        'description': 'My re-implementation of the pptod approach, in re-structured code',
        'output_file': './output/my-pre-train-t5-100ish-examples.jsonl'
    },
    'my-t5-fine-tune-checkpoint': {
        'name': 'my-t5-fine-tune-checkpoint',
        'description': 'My re-implementation of the pptod approach, in re-structured code, fine-tuned on MultiWoZ',
        'output_file': './output/my-fine-tune-t5-100ish-examples.jsonl'
    }
}
for model_def in models.values():
    model_def['examples'] = []
    with open(model_def['output_file'], 'r') as f:
        for line in f.readlines():
            model_def['examples'].append(json.loads(line.strip()))


model_names = list(models.keys())


model_name = sidebar.selectbox('Model', model_names)
active_model = models[model_name]

st.write(f"""
#### Inputs

**Selected Model:** `{active_model['name']}`

{active_model['description']}

""")
"""
### Belief State Prediction

Below is the predicted belief state as a sequence. 

- `input` denotes the input, which has been transformed into a list for
human readability but is presented to the model as a sequence.
- `gold` is the target belief state in sequence form (slot-name slot-value pairs)
- `generated` is the model generated belief state sequence
"""
titles = [f"{i}: {e[0]['turn_domain'][0]} (Turn {e[0]['turn_num']})" for i, e in enumerate(active_model['examples'])]
title = sidebar.selectbox("Development Example", titles)
active_example = active_model['examples'][int(title[0])][0]

active_belief_spans = active_example['bspn_input'].split("> <")
active_example_bs = {'input':
              [ ('<' if i > 0 else '') +
                string +
                ('>' if string[-1] is not '>' and len(active_belief_spans) > 1 else '')
                for i, string in enumerate(active_belief_spans)],
          'generated': active_example['bspn_gen'],
          'gold': active_example['bspn']}

st.write(active_example_bs)
"""
##### Generated Overlap
"""
annotated_text(*bs_unigram_match_annotated_text(active_example_bs))

"""
---

### Response Generation

Below is the predicted response as a sequence. 

- `input` denotes the input, which has been transformed into a list for
human readability but is presented to the model as a sequence.
- `gold` is the target response sequence
- `generated` is the model generated response
"""
#title = st.selectbox("Development Example", titles)

active_example_resp = {'input':
              [ ('<' if i > 0 else '') +
                string +
                ('>' if string[-1] is not '>' else '')
                for i, string in enumerate(active_example['resp_input'].split("> <"))],
          'generated': active_example['resp_gen'],
          'gold': active_example['resp']}

st.write(active_example_resp)
"""
##### Generated Overlap
"""
annotated_text(*bs_unigram_match_annotated_text(active_example_resp))