Chris-lab / pages /summarization_playground.py
kz209
update
51259f2
from dotenv import load_dotenv
import gradio as gr
import random
from utils.model import Model
from utils.data import dataset
import gc
import torch
import logging
load_dotenv()
custom_css = """
gradio-app {
background: #eeeefc !important;
}
.bordered-text {
border-style: solid;
border-width: 1px;
padding: 5px;
margin-bottom: 0px;
border-radius: 1px;
font-family: Verdana;
font-size: 20px !important;
font-weight: bold ;
color:#000000;
}
.parameter-text {
border-style: solid;
border-width: 1px;
padding: 5px;
margin-bottom: 0px;
border-radius: 1px;
font-family: Verdana;
font-size: 10px !important;
font-weight: bold ;
color:#000000;
}
.title {
font-size: 35px;
font-weight: maroon;
font-family: Helvetica;
}
input-label {
font-size: 20px;
font-weight: bold;
font-family: Papyrus;
}
.custom-button {
background-color: white !important /* Green background */
color: black; /* White text */
border: none; /* Remove border */
padding: 10px 20px; /* Add padding */
text-align: center; /* Center text */
display: inline-block; /* Inline block */
font-size: 22px; /* Font size */
margin: 4px 2px; /* Margin */
cursor: pointer; /* Pointer cursor on hover */
border-radius: 4px; /* Rounded corners */
}
.custom-button:hover {
background-color: black;
color: white;
}
"""
__model_on_gpu__ = ''
model = {model_name: None for model_name in Model.__model_list__}
random_label = '🔀 Random dialogue from dataset'
examples = {
"example 1": """Boston's injury reporting for Kristaps Porziņģis has been fairly coy. He missed Game 3, but his coach told reporters just before Game 4 that was technically available, but with a catch.
Joe Mazzulla said Porziņģis would "only be used in specific instances, if necessary." That sounds like the team doesn't want to risk further injury to his dislocated Posterior Tibialis (or some other body part, due to overcompensation for the ankle), unless it's in a desperate situation.
Being up 3-1, with Game 5 at home, doesn't qualify as desperate. So, expect the Celtics to continue slow-playing KP's return.
It'd obviously be nice for Boston to have his rim protection and jump shooting back. It was missed in the Game 4 blowout, but the Celtics have also demonstrated they can win without the big man throughout this campaign.
On top of winning Game 3 of this series, Boston is plus-10.9 points per 100 possessions when Porziņģis has been off the floor this regular and postseason.""",
"example 2": """Prior to the Finals, we predicted that Dereck Lively II's minutes would swell over the course of the series, and that's starting to play out.
He averaged 18.8 minutes in Games 1 and 2 and was up to 26.2 in Games 3 and 4. That's with the regulars being pulled long before the final buzzer in Friday's game, too.
Expect the rookie's playing time to continue to climb in Game 5. It seems increasingly clear that coach Jason Kidd trusts him over the rest of Dallas' bigs, and it's not hard to see why.
Lively has been absolutely relentless on the offensive glass all postseason. He makes solid decisions as a passer when his rolls don't immediately lead to dunks. And he's not a liability when caught defending guards or wings outside.
All of that has led to postseason averages of 8.2 points, 7.6 rebounds, 1.4 assists and 1.0 blocks in just 21.9 minutes, as well as a double-double in 22 minutes of Game 4.
Back in Boston, Kidd is going to rely on Lively even more. He'll play close to 30 minutes and reach double-figures in both scoring and rebounding again.""",
random_label: ""
}
def model_device_check(model_name):
global __model_on_gpu__
if __model_on_gpu__ != model_name:
if __model_on_gpu__:
logging.info(f"delete model {__model_on_gpu__}")
del model[__model_on_gpu__]
gc.collect()
torch.cuda.empty_cache()
model[model_name] = Model(model_name)
__model_on_gpu__ = model_name
def get_model_batch_generation(model_name):
model_device_check(model_name)
return model[model_name]
def generate_answer(sources, model_name, prompt, temperature=0.0001, max_new_tokens=500, do_sample=True):
model_device_check(model_name)
content = prompt + '\n{' + sources + '}\n\nsummary:'
answer = model[model_name].gen(content,temperature,max_new_tokens,do_sample)[0].strip()
return answer
def process_input(input_text, model_selection, prompt, temperature=0.0001, max_new_tokens=500, do_sample=True):
if input_text:
logging.info("Start generation")
response = generate_answer(input_text, model_selection, prompt, temperature, max_new_tokens, do_sample)
return f"## Original Dialogue:\n\n{input_text}\n\n## Summarization:\n\n{response}"
else:
return "Please fill the input to generate outputs."
def update_input(example):
if example == random_label:
datapoint = random.choice(dataset)
return datapoint['section_text'] + '\n\nDialogue:\n' + datapoint['dialogue']
return examples[example]
def create_summarization_interface():
with gr.Blocks(theme=gr.themes.Soft(spacing_size="sm",text_size="sm"), css=custom_css) as demo:
gr.Markdown("## This is a playground to test prompts for clinical dialogue summarizations")
with gr.Row():
example_dropdown = gr.Dropdown(choices=list(examples.keys()), label="Choose an example", value=random_label)
model_dropdown = gr.Dropdown(choices=Model.__model_list__, label="Choose a model", value=Model.__model_list__[0])
gr.Markdown("<div style='border: 4px solid white; padding: 3px; border-radius: 5px;width:100px;padding-top: 0.5px;padding-bottom: 10px;'><h3>Prompt 👥</h3></center></div>")
Template_text = gr.Textbox(value="""Summarize the following dialogue""", label='Input Prompting Template', lines=4, placeholder='Input your prompts')
datapoint = random.choice(dataset)
input_text = gr.Textbox(label="Input Dialogue", lines=7, placeholder="Enter text here...", value=datapoint['section_text'] + '\n\nDialogue:\n' + datapoint['dialogue'])
submit_button = gr.Button("✨ Submit ✨")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("<div style='border: 4px solid white; padding: 2px; border-radius: 5px;width:130px;padding-bottom: 10px;'><b><h3>Parameters 📈</h3></center></b></div>")
with gr.Column():
temperature = gr.Number(label="Temperature",elem_classes="parameter-text", value=0.0001, minimum=0.000001, maximum=1.0)
max_new_tokens = gr.Number(label="Max New Tokens",elem_classes="parameter-text", value=500, precision=0, minimum=0, maximum=500)
do_sample = gr.Dropdown([True,False],label="Do Sample",elem_classes="parameter-text", value=True)
with gr.Column(scale=3):
output = gr.Markdown(line_breaks=True)
example_dropdown.change(update_input, inputs=[example_dropdown], outputs=[input_text])
submit_button.click(process_input, inputs=[input_text,model_dropdown,Template_text,temperature,max_new_tokens,do_sample], outputs=[output])
return demo
if __name__ == "__main__":
demo = create_summarization_interface()
demo.launch()