Spaces:

Gabriel
/

Swe_summarizer

Runtime error

App Files Files Community

Swe_summarizer / app.py

Gabriel

Update app.py

dae11ca about 2 years ago

raw

history blame

7.77 kB

	import gradio as gr
	from transformers import pipeline
	import pandas as pd
	import json
	import nltk
	from sentence_transformers import SentenceTransformer, util
	import numpy as np
	from LexRank import *
	from text import *

	nltk.download('punkt')


	def lex_rank(in_text, threshold=None , ex_sent=4 ,model_in = 'KBLab/sentence-bert-swedish-cased', language='swedish' ):
	if threshold == 'None':
	threshold=None

	model = SentenceTransformer(model_in)
	#Split the document into sentences
	sentences = nltk.sent_tokenize(in_text, language=language)

	#Compute the sentence embeddings
	embeddings = model.encode(sentences, convert_to_tensor=True)
	cos_scores = util.cos_sim(embeddings, embeddings).cpu().numpy()

	#Compute the centrality for each sentence
	centrality_scores = degree_centrality_scores(cos_scores, threshold=threshold)

	most_central_sentence_indices = np.argsort(-centrality_scores)
	sent_list= []
	for idx in most_central_sentence_indices[0:ex_sent]:
	sent_list.append(sentences[idx])
	return ' '.join(sent_list)


	def generate(in_text, num_beams, min_len, max_len, model_in):
	print(in_text)
	pipe = pipeline("summarization", model=model_in)
	answer = pipe(in_text, num_beams=num_beams ,min_length=min_len, max_length=max_len)
	print(answer)
	return answer[0]["summary_text"]


	def update_history(df, in_text, gen_text ,model_in, sum_typ, parameters):
	# get rid of first seed phrase
	new_row = [{"In_text": in_text,
	"Gen_text": gen_text,
	"Sum_type": sum_typ ,
	"Gen_model": model_in,
	"Parameters": json.dumps(parameters)}]
	return pd.concat([df, pd.DataFrame(new_row)])

	def generate_transformer(in_text, num_beams, min_len, max_len, model_in, history):
	gen_text= generate(in_text,num_beams, min_len, max_len, model_in)
	return gen_text, update_history(history, in_text, gen_text, "Abstractive" ,model_in, {"num_beams": num_beams,
	"min_len": min_len,
	"max_len": max_len})

	def generate_lexrank(in_text, threshold, model_in, ex_sent ,language, history):
	gen_text= lex_rank(in_text, threshold, ex_sent ,model_in, language)
	return gen_text, update_history(history, in_text, gen_text, "Extractive" ,model_in, {"threshold": threshold,
	"Nr_sent": ex_sent,
	"language": language})

	with gr.Blocks() as demo:
	gr.Markdown("<h1><center> Swedish Summarization Engine! </center></h1>")
	with gr.Accordion("Read here for details about the app", open=False):

	with gr.Tabs():
	with gr.TabItem("The Summarization App"):
	gr.Markdown(sum_app_text_tab_1)

	with gr.TabItem("The Summarization Engine"):
	gr.Markdown(sum_app_text_tab_2)

	with gr.Tabs():
	with gr.TabItem("Abstractive Generation for Summarization"):
	gr.Markdown(
	"""The default parameters for this transformer based model work well to generate summarization.
	Use this tab to experiment summarization task of text for different types Abstractive models.""")
	with gr.Row():
	with gr.Column(scale=4):
	text_baseline_transformer= gr.TextArea(label="Input text to summarize", placeholder="Input summarization")

	with gr.Row():
	transformer_button_clear = gr.Button("Clear", variant='secondary')
	transformer_button = gr.Button("Summarize!", variant='primary')

	with gr.Column(scale=3):
	with gr.Row():
	num_beams = gr.Slider(minimum=2, maximum=10, value=2, step=1, label="Number of Beams")
	min_len = gr.Slider(minimum=10, maximum=50, value=25, step=5, label="Min length")
	max_len = gr.Slider(minimum=50, maximum=130, value=120, step=10, label="Max length")
	model_in = gr.Dropdown(["Gabriel/bart-base-cnn-swe", "Gabriel/bart-base-cnn-xsum-swe", "Gabriel/bart-base-cnn-xsum-wiki-swe"], value="Gabriel/bart-base-cnn-xsum-swe", label="Model")
	output_basline_transformer = gr.Textbox(label="Output Text")

	with gr.Row():
	with gr.Accordion("Here are some examples you can use:", open=False):
	gr.Markdown("<h3>Press one of the test examples below.<h3>")
	gr.Markdown("NOTE: First time inference for a new model will take time, since a new model has to downloaded before inference.")
	gr.Examples([[abstractive_example_text_1
	, 5,25,120, "Gabriel/bart-base-cnn-swe"],
	[abstractive_example_text_2
	, 5,25,120, "Gabriel/bart-base-cnn-xsum-swe"]
	], [text_baseline_transformer, num_beams, min_len, max_len, model_in])

	with gr.TabItem("Extractive Ranking Graph for Summarization"):
	gr.Markdown(
	"""Use this tab to experiment summarization task of text with a graph based method (LexRank).""")
	with gr.Row():
	with gr.Column(scale=4):
	text_extract= gr.TextArea(label="Input text to summarize", placeholder="Input text")
	with gr.Row():
	extract_button_clear = gr.Button("Clear", variant='secondary')
	extract_button = gr.Button("Summarize!", variant='primary')
	with gr.Column(scale=3):
	with gr.Row():
	ex_sent =gr.Slider(minimum=1, maximum=7, value=4, step=1, label="Sentences to return")
	ex_threshold = gr.Dropdown(['None',0.1,0.2,0.3,0.4,0.5], value='None', label="Similar Threshold")
	ex_language = gr.Dropdown(["swedish","english"], value="swedish", label="Language")
	model_in_ex = gr.Dropdown(["KBLab/sentence-bert-swedish-cased","sentence-transformers/all-MiniLM-L6-v2"], value="KBLab/sentence-bert-swedish-cased", label="Model")
	output_extract = gr.Textbox(label="Output Text")

	with gr.Row():
	with gr.Accordion("Here are some examples you can use:", open=False):
	gr.Markdown("<h3>Press one of the test examples below.<h3>")
	gr.Markdown("NOTE: First time inference for a new model will take time, since a new model has to downloaded before inference.")
	gr.Examples([[extractive_example_text_1
	, 'None', 4,'swedish', "KBLab/sentence-bert-swedish-cased"]], [text_extract, ex_threshold, ex_sent ,ex_language, model_in_ex])

	with gr.Box():
	gr.Markdown("<h3> Generation History <h3>")
	# Displays a dataframe with the history of moves generated, with parameters
	history = gr.Dataframe(headers=["In_text", "Gen_text","Sum_type" ,"Gen_model", "Parameters"], overflow_row_behaviour="show_ends", wrap=True)

	transformer_button.click(generate_transformer, inputs=[text_baseline_transformer, num_beams, min_len, max_len, model_in ,history], outputs=[output_basline_transformer , history] )
	extract_button.click(generate_lexrank, inputs=[text_extract, ex_threshold, model_in_ex, ex_sent ,ex_language ,history], outputs=[output_extract , history] )


	demo.launch()