Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import pipeline | |
import pandas as pd | |
import json | |
import nltk | |
from sentence_transformers import SentenceTransformer, util | |
import numpy as np | |
from LexRank import * | |
from text import * | |
nltk.download('punkt') | |
def lex_rank(in_text, threshold=None , ex_sent=4 ,model_in = 'KBLab/sentence-bert-swedish-cased', language='swedish' ): | |
if threshold == 'None': | |
threshold=None | |
model = SentenceTransformer(model_in) | |
#Split the document into sentences | |
sentences = nltk.sent_tokenize(in_text, language=language) | |
#Compute the sentence embeddings | |
embeddings = model.encode(sentences, convert_to_tensor=True) | |
cos_scores = util.cos_sim(embeddings, embeddings).cpu().numpy() | |
#Compute the centrality for each sentence | |
centrality_scores = degree_centrality_scores(cos_scores, threshold=threshold) | |
most_central_sentence_indices = np.argsort(-centrality_scores) | |
sent_list= [] | |
for idx in most_central_sentence_indices[0:ex_sent]: | |
sent_list.append(sentences[idx]) | |
return ' '.join(sent_list) | |
def generate(in_text, num_beams, min_len, max_len, model_in): | |
print(in_text) | |
pipe = pipeline("summarization", model=model_in) | |
answer = pipe(in_text, num_beams=num_beams ,min_length=min_len, max_length=max_len) | |
print(answer) | |
return answer[0]["summary_text"] | |
def update_history(df, in_text, gen_text ,model_in, sum_typ, parameters): | |
# get rid of first seed phrase | |
new_row = [{"In_text": in_text, | |
"Gen_text": gen_text, | |
"Sum_type": sum_typ , | |
"Gen_model": model_in, | |
"Parameters": json.dumps(parameters)}] | |
return pd.concat([df, pd.DataFrame(new_row)]) | |
def generate_transformer(in_text, num_beams, min_len, max_len, model_in, history): | |
gen_text= generate(in_text,num_beams, min_len, max_len, model_in) | |
return gen_text, update_history(history, in_text, gen_text, "Abstractive" ,model_in, {"num_beams": num_beams, | |
"min_len": min_len, | |
"max_len": max_len}) | |
def generate_lexrank(in_text, threshold, model_in, ex_sent ,language, history): | |
gen_text= lex_rank(in_text, threshold, ex_sent ,model_in, language) | |
return gen_text, update_history(history, in_text, gen_text, "Extractive" ,model_in, {"threshold": threshold, | |
"Nr_sent": ex_sent, | |
"language": language}) | |
with gr.Blocks() as demo: | |
gr.Markdown("<h1><center> Swedish Summarization Engine! </center></h1>") | |
with gr.Accordion("Read here for details about the app", open=False): | |
with gr.Tabs(): | |
with gr.TabItem("The Summarization App"): | |
gr.Markdown(sum_app_text_tab_1) | |
with gr.TabItem("The Summarization Engine"): | |
gr.Markdown(sum_app_text_tab_2) | |
with gr.Tabs(): | |
with gr.TabItem("Abstractive Generation for Summarization"): | |
gr.Markdown( | |
"""The default parameters for this transformer based model work well to generate summarization. | |
Use this tab to experiment summarization task of text for different types Abstractive models.""") | |
with gr.Row(): | |
with gr.Column(scale=4): | |
text_baseline_transformer= gr.TextArea(label="Input text to summarize", placeholder="Input summarization") | |
with gr.Row(): | |
transformer_button_clear = gr.Button("Clear", variant='secondary') | |
transformer_button = gr.Button("Summarize!", variant='primary') | |
with gr.Column(scale=3): | |
with gr.Row(): | |
num_beams = gr.Slider(minimum=2, maximum=10, value=2, step=1, label="Number of Beams") | |
min_len = gr.Slider(minimum=10, maximum=50, value=25, step=5, label="Min length") | |
max_len = gr.Slider(minimum=50, maximum=130, value=120, step=10, label="Max length") | |
model_in = gr.Dropdown(["Gabriel/bart-base-cnn-swe", "Gabriel/bart-base-cnn-xsum-swe", "Gabriel/bart-base-cnn-xsum-wiki-swe"], value="Gabriel/bart-base-cnn-xsum-swe", label="Model") | |
output_basline_transformer = gr.Textbox(label="Output Text") | |
with gr.Row(): | |
with gr.Accordion("Here are some examples you can use:", open=False): | |
gr.Markdown("<h3>Press one of the test examples below.<h3>") | |
gr.Markdown("NOTE: First time inference for a new model will take time, since a new model has to downloaded before inference.") | |
gr.Examples([[abstractive_example_text_1 | |
, 5,25,120, "Gabriel/bart-base-cnn-swe"], | |
[abstractive_example_text_2 | |
, 5,25,120, "Gabriel/bart-base-cnn-xsum-swe"] | |
], [text_baseline_transformer, num_beams, min_len, max_len, model_in]) | |
with gr.TabItem("Extractive Ranking Graph for Summarization"): | |
gr.Markdown( | |
"""Use this tab to experiment summarization task of text with a graph based method (LexRank).""") | |
with gr.Row(): | |
with gr.Column(scale=4): | |
text_extract= gr.TextArea(label="Input text to summarize", placeholder="Input text") | |
with gr.Row(): | |
extract_button_clear = gr.Button("Clear", variant='secondary') | |
extract_button = gr.Button("Summarize!", variant='primary') | |
with gr.Column(scale=3): | |
with gr.Row(): | |
ex_sent =gr.Slider(minimum=1, maximum=7, value=4, step=1, label="Sentences to return") | |
ex_threshold = gr.Dropdown(['None',0.1,0.2,0.3,0.4,0.5], value='None', label="Similar Threshold") | |
ex_language = gr.Dropdown(["swedish","english"], value="swedish", label="Language") | |
model_in_ex = gr.Dropdown(["KBLab/sentence-bert-swedish-cased","sentence-transformers/all-MiniLM-L6-v2"], value="KBLab/sentence-bert-swedish-cased", label="Model") | |
output_extract = gr.Textbox(label="Output Text") | |
with gr.Row(): | |
with gr.Accordion("Here are some examples you can use:", open=False): | |
gr.Markdown("<h3>Press one of the test examples below.<h3>") | |
gr.Markdown("NOTE: First time inference for a new model will take time, since a new model has to downloaded before inference.") | |
gr.Examples([[extractive_example_text_1 | |
, 'None', 4,'swedish', "KBLab/sentence-bert-swedish-cased"]], [text_extract, ex_threshold, ex_sent ,ex_language, model_in_ex]) | |
with gr.Box(): | |
gr.Markdown("<h3> Generation History <h3>") | |
# Displays a dataframe with the history of moves generated, with parameters | |
history = gr.Dataframe(headers=["In_text", "Gen_text","Sum_type" ,"Gen_model", "Parameters"], overflow_row_behaviour="show_ends", wrap=True) | |
transformer_button.click(generate_transformer, inputs=[text_baseline_transformer, num_beams, min_len, max_len, model_in ,history], outputs=[output_basline_transformer , history] ) | |
extract_button.click(generate_lexrank, inputs=[text_extract, ex_threshold, model_in_ex, ex_sent ,ex_language ,history], outputs=[output_extract , history] ) | |
demo.launch() |