File size: 9,631 Bytes
74688de
 
 
 
 
 
 
 
 
 
1f845b3
74688de
8a4dd4f
7e0543d
74688de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a4dd4f
 
 
 
 
 
 
 
74688de
 
 
 
8a4dd4f
74688de
 
8a4dd4f
8cfb467
f1f40cb
8cfb467
c42b674
8cfb467
 
 
 
 
 
8a4dd4f
93d78ca
74688de
 
 
 
 
93d78ca
74688de
93d78ca
 
 
74688de
 
 
 
 
 
 
 
 
fc2a850
 
74688de
 
 
 
 
 
 
 
 
08e3335
8a4dd4f
0008647
 
 
 
 
 
 
 
 
 
 
 
5c949ee
0008647
 
 
8a4dd4f
 
 
 
 
 
 
 
 
 
 
 
0008647
8a4dd4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2d8cf6
8a4dd4f
 
 
df73138
8a4dd4f
 
 
 
 
 
 
 
 
 
 
df73138
8cfb467
 
8a4dd4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8cfb467
 
8a4dd4f
 
 
 
 
 
 
 
 
 
 
0008647
a15f9dd
8a4dd4f
 
 
 
 
 
 
 
 
 
 
0008647
74386d3
8a4dd4f
 
 
 
 
 
 
 
 
0e579ca
 
 
 
 
 
 
0008647
8a4dd4f
 
 
 
 
 
 
 
0008647
 
a3db831
650f9cf
 
 
 
0008647
84621ca
 
 
5c949ee
 
f580570
8cfb467
8a4dd4f
8e14bf1
330e431
fd56584
8a4dd4f
74688de
8a4dd4f
93d78ca
330e431
8a4dd4f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# coding=utf-8
# Copyright 2023 The GIRT Authors.
# Lint as: python3


# This space is built based on AMR-KELEG/ALDi and cis-lmu/GlotLID space.
# GIRT Space

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import streamlit as st
import pandas as pd
import base64
import json

@st.cache_data
def render_svg(svg):
    """Renders the given svg string."""
    b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
    html = rf'<p align="center"> <img src="data:image/svg+xml;base64,{b64}", width="40%"/> </p>'
    c = st.container()
    c.write(html, unsafe_allow_html=True)


@st.cache_resource
def load_model(model_name):
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    return model

@st.cache_resource
def load_tokenizer(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return tokenizer

@st.cache_resource
def load_examples():
    with open("assets/examples.json", "r") as f:
        examples = json.load(f)
    return examples


# load resources
with st.spinner(text="Please wait while the model is loading...."):

    model = load_model('nafisehNik/girt-t5-base')
    tokenizer = load_tokenizer('nafisehNik/girt-t5-base')
    examples = load_examples()


# create instruction from metadata
def create_instruction(name, about, title, labels, assignees, headline_type, headline, summary):
    value_list = [name, about, title, labels, assignees, headline_type, headline]

    value_list = ['<|MASK|>' if not element else element for element in value_list]
    if not summary:
        summary = '<|EMPTY|>'
    
    instruction = f'name: {value_list[0]}\nabout: {value_list[1]}\ntitle: {value_list[2]}\nlabels: {value_list[3]}\nassignees: {value_list[4]}\nheadlines_type: {value_list[5]}\nheadlines: {value_list[6]}\nsummary: {summary}'
    return instruction

# compute the output
def compute(sample, top_p, top_k, do_sample, max_length, min_length):

    inputs = tokenizer(sample, return_tensors="pt").to('cpu')

    outputs = model.generate(
        **inputs,
        min_length= min_length,
        max_length=max_length,
        do_sample=do_sample,
        top_p=top_p,
        top_k=top_k).to('cpu')

    generated_texts = tokenizer.batch_decode(outputs, skip_special_tokens=False)
    generated_text = generated_texts[0]
    
    replace_dict = {
        '\n ': '\n',
        '</s>': '',
        '<pad> ': '',
        '<pad>': '',
        '<unk>!--': '<!--',
        '<unk>': '',
    }
    
    postprocess_text = generated_text
    for key, value in replace_dict.items():
        postprocess_text = postprocess_text.replace(key, value)


    return postprocess_text


# exapnd sidebar
st.markdown(
    """
    <style>
    [data-testid="stSidebar"][aria-expanded="true"]{
        min-width: 450px;
        max-width: 450px;
    }
    """,
    unsafe_allow_html=True)


with st.sidebar:

    st.title(" πŸ”§ Settings")

    with st.expander("πŸ— Issue Template Inputs", True):

        # choose examples    
        in_examples = st.selectbox(
        label = 'You can select one of the following examples and customize it:',
        options = ('no example', 'bug report', 'feature request', 'question', 'documentation'),
        index = 1)

        in_name = st.text_input(
            label = "Name Metadata: ",
            value = examples[in_examples]['name'],
            placeholder="e.g., Bug Report or Feqture Request or Question",
            on_change = None)
        
        in_about = st.text_input(
            label = "About Metadata: ",
            value = examples[in_examples]['about'],
            placeholder="e.g., File a bug report",
            on_change=None)

        # Title
        if examples[in_examples]['title'] == '<|EMPTY|>':
            empty_title_value_default = True
        else:
            empty_title_value_default = False
        in_empty_title = st.checkbox(label ='without title', value = empty_title_value_default)

        if in_empty_title == False:
            # check if the example title is actually not empty
            title_value_default = examples[in_examples]['title'] if examples[in_examples]['title'] != '<|EMPTY|>' else ""
            in_title = st.text_input(
            label = "Title Metadata: ",
            value = title_value_default,
            placeholder="e.g., [Bug]: ",
            on_change=None)
        else:
            in_title = '<|EMPTY|>'

        # Labels
        if examples[in_examples]['labels'] == '<|EMPTY|>':
            empty_labels_value_default = True
        else:
            empty_labels_value_default = False
        in_empty_labels = st.checkbox(label ='without labels', value = empty_labels_value_default)

        if in_empty_labels == False:
            # check if the example labels is actually not empty
            labels_value_default = examples[in_examples]['labels'] if examples[in_examples]['labels'] != '<|EMPTY|>' else ""
            in_labels = st.text_input(
            label = "Labels Metadata: ",
            value = labels_value_default,
            placeholder="e.g., feature, enhancement",
            on_change=None)
        else:
            in_labels = '<|EMPTY|>'

        # Assignees
        if examples[in_examples]['assignees'] == '<|EMPTY|>':
            empty_assignees_value_default = True
        else:
            empty_assignees_value_default = False
        in_empty_assignees = st.checkbox(label ='without assignees', value = empty_assignees_value_default)

        if in_empty_assignees == False:
            # check if the example assignees is actually not empty
            assignees_value_default = examples[in_examples]['assignees'] if examples[in_examples]['assignees'] != '<|EMPTY|>' else ""
            in_assignees = st.text_input(
            label = "Assignees Metadata: ",
            value = assignees_value_default,
            placeholder="e.g., feature, enhancement",
            on_change=None)
        else:
            in_assignees = '<|EMPTY|>'

        # headline type
        if examples[in_examples]['headlines_type'] == '<|EMPTY|>':
            headlines_type_value_default = 3
        elif examples[in_examples]['headlines_type'] == '':
            headlines_type_value_default = 2
        elif examples[in_examples]['headlines_type'] == '**Emphasis**':
            headlines_type_value_default = 1
        else:
            headlines_type_value_default = 0

        # if no headlines is selected, force the headlines to be empty as well.
        in_headline_type = st.selectbox(
        label = 'How would you like to be your Headlines?',
        options = ('# Heading', '**Emphasis**', 'Either', 'No headlines'),
        index = headlines_type_value_default)

        if in_headline_type == 'Either':
            in_headline_type = '<|MASK|>'

        if in_headline_type == 'No headlines':
            in_headline_type = '<|EMPTY|>'
            in_headlines = '<|EMPTY|>'


        if in_headline_type!='No headlines':
            
            headlines_value_default = examples[in_examples]['headlines'] if examples[in_examples]['headlines'] != '<|EMPTY|>' else ""

            in_headlines = st.text_area(
                label = "Headlines: ",
                value = headlines_value_default,
                placeholder="Enter each headline in one line. e.g.,\nWelcome\nConcise Description\nAdditional Info",
                on_change=None,
                height=200)

            if not in_headlines:
                in_headlines = '<|MASK|>'
            else:
                in_headlines = in_headlines.split('\n')
                in_headlines = [element.strip() for element in in_headlines]


        # summary
        summary_value_default = examples[in_examples]['summary'] if examples[in_examples]['summary'] != '<|EMPTY|>' else ""
        in_summary = st.text_area(
            label = "Summary: ",
            value = summary_value_default,
            placeholder="This Github Issue Template is ...",
            on_change=None,
            height=200)


    with st.expander("πŸŽ› Model Config", False):
        max_length_in = st.slider("max_length", 30, 512, 300)
        min_length_in = st.slider("min_length", 0, 300, 30)
        top_p_in = st.slider("top_p", 0.0, 1.0, 0.92)
        top_k_in = st.slider("top_k", 0, 100, 0)

    # load deduplicate icon
    st.markdown("[![Duplicate Space](https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14)](https://huggingface.co/spaces/nafisehNik/girt-space?duplicate=true)")

# load logo
render_svg(open("assets/logo.svg").read())
st.markdown('The current demo runs on the CPU, making it slower than when the GPU is in use (each request takes 2-3 seconds on the GPU). A GPU-based demo is requested to HuggingFace team.')

prompt = create_instruction(in_name, in_about, in_title, in_labels, in_assignees, in_headline_type, in_headlines, in_summary)

st.code(prompt, language="python")

clicked = st.button("Submit")

if clicked:
    with st.spinner("Please Wait..."):
        res = compute(prompt, top_p = top_p_in, top_k=top_k_in, do_sample=True, max_length=max_length_in, min_length=min_length_in)
        st.code(res, language="python")