Spaces:
Sleeping
Sleeping
File size: 9,631 Bytes
74688de 1f845b3 74688de 8a4dd4f 7e0543d 74688de 8a4dd4f 74688de 8a4dd4f 74688de 8a4dd4f 8cfb467 f1f40cb 8cfb467 c42b674 8cfb467 8a4dd4f 93d78ca 74688de 93d78ca 74688de 93d78ca 74688de fc2a850 74688de 08e3335 8a4dd4f 0008647 5c949ee 0008647 8a4dd4f 0008647 8a4dd4f c2d8cf6 8a4dd4f df73138 8a4dd4f df73138 8cfb467 8a4dd4f 8cfb467 8a4dd4f 0008647 a15f9dd 8a4dd4f 0008647 74386d3 8a4dd4f 0e579ca 0008647 8a4dd4f 0008647 a3db831 650f9cf 0008647 84621ca 5c949ee f580570 8cfb467 8a4dd4f 8e14bf1 330e431 fd56584 8a4dd4f 74688de 8a4dd4f 93d78ca 330e431 8a4dd4f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 |
# coding=utf-8
# Copyright 2023 The GIRT Authors.
# Lint as: python3
# This space is built based on AMR-KELEG/ALDi and cis-lmu/GlotLID space.
# GIRT Space
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import streamlit as st
import pandas as pd
import base64
import json
@st.cache_data
def render_svg(svg):
"""Renders the given svg string."""
b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
html = rf'<p align="center"> <img src="data:image/svg+xml;base64,{b64}", width="40%"/> </p>'
c = st.container()
c.write(html, unsafe_allow_html=True)
@st.cache_resource
def load_model(model_name):
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
return model
@st.cache_resource
def load_tokenizer(model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
return tokenizer
@st.cache_resource
def load_examples():
with open("assets/examples.json", "r") as f:
examples = json.load(f)
return examples
# load resources
with st.spinner(text="Please wait while the model is loading...."):
model = load_model('nafisehNik/girt-t5-base')
tokenizer = load_tokenizer('nafisehNik/girt-t5-base')
examples = load_examples()
# create instruction from metadata
def create_instruction(name, about, title, labels, assignees, headline_type, headline, summary):
value_list = [name, about, title, labels, assignees, headline_type, headline]
value_list = ['<|MASK|>' if not element else element for element in value_list]
if not summary:
summary = '<|EMPTY|>'
instruction = f'name: {value_list[0]}\nabout: {value_list[1]}\ntitle: {value_list[2]}\nlabels: {value_list[3]}\nassignees: {value_list[4]}\nheadlines_type: {value_list[5]}\nheadlines: {value_list[6]}\nsummary: {summary}'
return instruction
# compute the output
def compute(sample, top_p, top_k, do_sample, max_length, min_length):
inputs = tokenizer(sample, return_tensors="pt").to('cpu')
outputs = model.generate(
**inputs,
min_length= min_length,
max_length=max_length,
do_sample=do_sample,
top_p=top_p,
top_k=top_k).to('cpu')
generated_texts = tokenizer.batch_decode(outputs, skip_special_tokens=False)
generated_text = generated_texts[0]
replace_dict = {
'\n ': '\n',
'</s>': '',
'<pad> ': '',
'<pad>': '',
'<unk>!--': '<!--',
'<unk>': '',
}
postprocess_text = generated_text
for key, value in replace_dict.items():
postprocess_text = postprocess_text.replace(key, value)
return postprocess_text
# exapnd sidebar
st.markdown(
"""
<style>
[data-testid="stSidebar"][aria-expanded="true"]{
min-width: 450px;
max-width: 450px;
}
""",
unsafe_allow_html=True)
with st.sidebar:
st.title(" π§ Settings")
with st.expander("π Issue Template Inputs", True):
# choose examples
in_examples = st.selectbox(
label = 'You can select one of the following examples and customize it:',
options = ('no example', 'bug report', 'feature request', 'question', 'documentation'),
index = 1)
in_name = st.text_input(
label = "Name Metadata: ",
value = examples[in_examples]['name'],
placeholder="e.g., Bug Report or Feqture Request or Question",
on_change = None)
in_about = st.text_input(
label = "About Metadata: ",
value = examples[in_examples]['about'],
placeholder="e.g., File a bug report",
on_change=None)
# Title
if examples[in_examples]['title'] == '<|EMPTY|>':
empty_title_value_default = True
else:
empty_title_value_default = False
in_empty_title = st.checkbox(label ='without title', value = empty_title_value_default)
if in_empty_title == False:
# check if the example title is actually not empty
title_value_default = examples[in_examples]['title'] if examples[in_examples]['title'] != '<|EMPTY|>' else ""
in_title = st.text_input(
label = "Title Metadata: ",
value = title_value_default,
placeholder="e.g., [Bug]: ",
on_change=None)
else:
in_title = '<|EMPTY|>'
# Labels
if examples[in_examples]['labels'] == '<|EMPTY|>':
empty_labels_value_default = True
else:
empty_labels_value_default = False
in_empty_labels = st.checkbox(label ='without labels', value = empty_labels_value_default)
if in_empty_labels == False:
# check if the example labels is actually not empty
labels_value_default = examples[in_examples]['labels'] if examples[in_examples]['labels'] != '<|EMPTY|>' else ""
in_labels = st.text_input(
label = "Labels Metadata: ",
value = labels_value_default,
placeholder="e.g., feature, enhancement",
on_change=None)
else:
in_labels = '<|EMPTY|>'
# Assignees
if examples[in_examples]['assignees'] == '<|EMPTY|>':
empty_assignees_value_default = True
else:
empty_assignees_value_default = False
in_empty_assignees = st.checkbox(label ='without assignees', value = empty_assignees_value_default)
if in_empty_assignees == False:
# check if the example assignees is actually not empty
assignees_value_default = examples[in_examples]['assignees'] if examples[in_examples]['assignees'] != '<|EMPTY|>' else ""
in_assignees = st.text_input(
label = "Assignees Metadata: ",
value = assignees_value_default,
placeholder="e.g., feature, enhancement",
on_change=None)
else:
in_assignees = '<|EMPTY|>'
# headline type
if examples[in_examples]['headlines_type'] == '<|EMPTY|>':
headlines_type_value_default = 3
elif examples[in_examples]['headlines_type'] == '':
headlines_type_value_default = 2
elif examples[in_examples]['headlines_type'] == '**Emphasis**':
headlines_type_value_default = 1
else:
headlines_type_value_default = 0
# if no headlines is selected, force the headlines to be empty as well.
in_headline_type = st.selectbox(
label = 'How would you like to be your Headlines?',
options = ('# Heading', '**Emphasis**', 'Either', 'No headlines'),
index = headlines_type_value_default)
if in_headline_type == 'Either':
in_headline_type = '<|MASK|>'
if in_headline_type == 'No headlines':
in_headline_type = '<|EMPTY|>'
in_headlines = '<|EMPTY|>'
if in_headline_type!='No headlines':
headlines_value_default = examples[in_examples]['headlines'] if examples[in_examples]['headlines'] != '<|EMPTY|>' else ""
in_headlines = st.text_area(
label = "Headlines: ",
value = headlines_value_default,
placeholder="Enter each headline in one line. e.g.,\nWelcome\nConcise Description\nAdditional Info",
on_change=None,
height=200)
if not in_headlines:
in_headlines = '<|MASK|>'
else:
in_headlines = in_headlines.split('\n')
in_headlines = [element.strip() for element in in_headlines]
# summary
summary_value_default = examples[in_examples]['summary'] if examples[in_examples]['summary'] != '<|EMPTY|>' else ""
in_summary = st.text_area(
label = "Summary: ",
value = summary_value_default,
placeholder="This Github Issue Template is ...",
on_change=None,
height=200)
with st.expander("π Model Config", False):
max_length_in = st.slider("max_length", 30, 512, 300)
min_length_in = st.slider("min_length", 0, 300, 30)
top_p_in = st.slider("top_p", 0.0, 1.0, 0.92)
top_k_in = st.slider("top_k", 0, 100, 0)
# load deduplicate icon
st.markdown("[![Duplicate Space](https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14)](https://huggingface.co/spaces/nafisehNik/girt-space?duplicate=true)")
# load logo
render_svg(open("assets/logo.svg").read())
st.markdown('The current demo runs on the CPU, making it slower than when the GPU is in use (each request takes 2-3 seconds on the GPU). A GPU-based demo is requested to HuggingFace team.')
prompt = create_instruction(in_name, in_about, in_title, in_labels, in_assignees, in_headline_type, in_headlines, in_summary)
st.code(prompt, language="python")
clicked = st.button("Submit")
if clicked:
with st.spinner("Please Wait..."):
res = compute(prompt, top_p = top_p_in, top_k=top_k_in, do_sample=True, max_length=max_length_in, min_length=min_length_in)
st.code(res, language="python") |