Spaces:

nafisehNik
/

girt-space

Sleeping

File size: 9,631 Bytes

# coding=utf-8
# Copyright 2023 The GIRT Authors.
# Lint as: python3


# This space is built based on AMR-KELEG/ALDi and cis-lmu/GlotLID space.
# GIRT Space

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import streamlit as st
import pandas as pd
import base64
import json

@st.cache_data
def render_svg(svg):
    """Renders the given svg string."""
    b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
    html = rf'<p align="center"> <img src="data:image/svg+xml;base64,{b64}", width="40%"/> </p>'
    c = st.container()
    c.write(html, unsafe_allow_html=True)


@st.cache_resource
def load_model(model_name):
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    return model

@st.cache_resource
def load_tokenizer(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return tokenizer

@st.cache_resource
def load_examples():
    with open("assets/examples.json", "r") as f:
        examples = json.load(f)
    return examples


# load resources
with st.spinner(text="Please wait while the model is loading...."):

    model = load_model('nafisehNik/girt-t5-base')
    tokenizer = load_tokenizer('nafisehNik/girt-t5-base')
    examples = load_examples()


# create instruction from metadata
def create_instruction(name, about, title, labels, assignees, headline_type, headline, summary):
    value_list = [name, about, title, labels, assignees, headline_type, headline]

    value_list = ['<|MASK|>' if not element else element for element in value_list]
    if not summary:
        summary = '<|EMPTY|>'
    
    instruction = f'name: {value_list[0]}\nabout: {value_list[1]}\ntitle: {value_list[2]}\nlabels: {value_list[3]}\nassignees: {value_list[4]}\nheadlines_type: {value_list[5]}\nheadlines: {value_list[6]}\nsummary: {summary}'
    return instruction

# compute the output
def compute(sample, top_p, top_k, do_sample, max_length, min_length):

    inputs = tokenizer(sample, return_tensors="pt").to('cpu')

    outputs = model.generate(
        **inputs,
        min_length= min_length,
        max_length=max_length,
        do_sample=do_sample,
        top_p=top_p,
        top_k=top_k).to('cpu')

    generated_texts = tokenizer.batch_decode(outputs, skip_special_tokens=False)
    generated_text = generated_texts[0]
    
    replace_dict = {
        '\n ': '\n',
        '</s>': '',
        '<pad> ': '',
        '<pad>': '',
        '<unk>!--': '<!--',
        '<unk>': '',
    }
    
    postprocess_text = generated_text
    for key, value in replace_dict.items():
        postprocess_text = postprocess_text.replace(key, value)


    return postprocess_text


# exapnd sidebar
st.markdown(
    """
    <style>
    [data-testid="stSidebar"][aria-expanded="true"]{
        min-width: 450px;
        max-width: 450px;
    }
    """,
    unsafe_allow_html=True)


with st.sidebar:

    st.title(" 🔧 Settings")

    with st.expander("🏗 Issue Template Inputs", True):

        # choose examples    
        in_examples = st.selectbox(
        label = 'You can select one of the following examples and customize it:',
        options = ('no example', 'bug report', 'feature request', 'question', 'documentation'),
        index = 1)

        in_name = st.text_input(
            label = "Name Metadata: ",
            value = examples[in_examples]['name'],
            placeholder="e.g., Bug Report or Feqture Request or Question",
            on_change = None)
        
        in_about = st.text_input(
            label = "About Metadata: ",
            value = examples[in_examples]['about'],
            placeholder="e.g., File a bug report",
            on_change=None)

        # Title
        if examples[in_examples]['title'] == '<|EMPTY|>':
            empty_title_value_default = True
        else:
            empty_title_value_default = False
        in_empty_title = st.checkbox(label ='without title', value = empty_title_value_default)

        if in_empty_title == False:
            # check if the example title is actually not empty
            title_value_default = examples[in_examples]['title'] if examples[in_examples]['title'] != '<|EMPTY|>' else ""
            in_title = st.text_input(
            label = "Title Metadata: ",
            value = title_value_default,
            placeholder="e.g., [Bug]: ",
            on_change=None)
        else:
            in_title = '<|EMPTY|>'

        # Labels
        if examples[in_examples]['labels'] == '<|EMPTY|>':
            empty_labels_value_default = True
        else:
            empty_labels_value_default = False
        in_empty_labels = st.checkbox(label ='without labels', value = empty_labels_value_default)

        if in_empty_labels == False:
            # check if the example labels is actually not empty
            labels_value_default = examples[in_examples]['labels'] if examples[in_examples]['labels'] != '<|EMPTY|>' else ""
            in_labels = st.text_input(
            label = "Labels Metadata: ",
            value = labels_value_default,
            placeholder="e.g., feature, enhancement",
            on_change=None)
        else:
            in_labels = '<|EMPTY|>'

        # Assignees
        if examples[in_examples]['assignees'] == '<|EMPTY|>':
            empty_assignees_value_default = True
        else:
            empty_assignees_value_default = False
        in_empty_assignees = st.checkbox(label ='without assignees', value = empty_assignees_value_default)

        if in_empty_assignees == False:
            # check if the example assignees is actually not empty
            assignees_value_default = examples[in_examples]['assignees'] if examples[in_examples]['assignees'] != '<|EMPTY|>' else ""
            in_assignees = st.text_input(
            label = "Assignees Metadata: ",
            value = assignees_value_default,
            placeholder="e.g., feature, enhancement",
            on_change=None)
        else:
            in_assignees = '<|EMPTY|>'

        # headline type
        if examples[in_examples]['headlines_type'] == '<|EMPTY|>':
            headlines_type_value_default = 3
        elif examples[in_examples]['headlines_type'] == '':
            headlines_type_value_default = 2
        elif examples[in_examples]['headlines_type'] == '**Emphasis**':
            headlines_type_value_default = 1
        else:
            headlines_type_value_default = 0

        # if no headlines is selected, force the headlines to be empty as well.
        in_headline_type = st.selectbox(
        label = 'How would you like to be your Headlines?',
        options = ('# Heading', '**Emphasis**', 'Either', 'No headlines'),
        index = headlines_type_value_default)

        if in_headline_type == 'Either':
            in_headline_type = '<|MASK|>'

        if in_headline_type == 'No headlines':
            in_headline_type = '<|EMPTY|>'
            in_headlines = '<|EMPTY|>'


        if in_headline_type!='No headlines':
            
            headlines_value_default = examples[in_examples]['headlines'] if examples[in_examples]['headlines'] != '<|EMPTY|>' else ""

            in_headlines = st.text_area(
                label = "Headlines: ",
                value = headlines_value_default,
                placeholder="Enter each headline in one line. e.g.,\nWelcome\nConcise Description\nAdditional Info",
                on_change=None,
                height=200)

            if not in_headlines:
                in_headlines = '<|MASK|>'
            else:
                in_headlines = in_headlines.split('\n')
                in_headlines = [element.strip() for element in in_headlines]


        # summary
        summary_value_default = examples[in_examples]['summary'] if examples[in_examples]['summary'] != '<|EMPTY|>' else ""
        in_summary = st.text_area(
            label = "Summary: ",
            value = summary_value_default,
            placeholder="This Github Issue Template is ...",
            on_change=None,
            height=200)


    with st.expander("🎛 Model Config", False):
        max_length_in = st.slider("max_length", 30, 512, 300)
        min_length_in = st.slider("min_length", 0, 300, 30)
        top_p_in = st.slider("top_p", 0.0, 1.0, 0.92)
        top_k_in = st.slider("top_k", 0, 100, 0)

    # load deduplicate icon
    st.markdown("[![Duplicate Space](https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14)](https://huggingface.co/spaces/nafisehNik/girt-space?duplicate=true)")

# load logo
render_svg(open("assets/logo.svg").read())
st.markdown('The current demo runs on the CPU, making it slower than when the GPU is in use (each request takes 2-3 seconds on the GPU). A GPU-based demo is requested to HuggingFace team.')

prompt = create_instruction(in_name, in_about, in_title, in_labels, in_assignees, in_headline_type, in_headlines, in_summary)

st.code(prompt, language="python")

clicked = st.button("Submit")

if clicked:
    with st.spinner("Please Wait..."):
        res = compute(prompt, top_p = top_p_in, top_k=top_k_in, do_sample=True, max_length=max_length_in, min_length=min_length_in)
        st.code(res, language="python")