Spaces:
Runtime error
Runtime error
# Initialize the space | |
summarizeryt = pipeline("summarization", model="facebook/bart-large-cnn") | |
summarizerbg = pipeline("summarization") | |
summarizertx = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum") | |
# How to use: YTVideoToText("https://www.youtube.com/watch?v=jQL0ZeHtXFc") | |
def YTVideoToText(video_link): | |
# installing & importing libraries | |
from transformers import pipeline | |
from youtube_transcript_api import YouTubeTranscriptApi | |
# fetching video transcript | |
video_id = video_link.split("=")[1] | |
transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
# iterating throughout and adding all text together | |
result = "" | |
for i in transcript: | |
result += ' ' + i['text'] | |
# summarize text | |
num_iters = int(len(result)/1000) | |
summarized_text = [] | |
summarized_text2 = [] | |
for i in range(0, num_iters + 1): | |
start = 0 | |
start = i * 1000 | |
end = (i + 1) * 1000 | |
out = summarizeryt(result[start:end], max_new_tokens=130, min_length=30, do_sample=False) | |
out = out[0] | |
out = out['summary_text'] | |
summarized_text.append(out) | |
summarized_text2 = ' '.join(summarized_text) | |
# returning summary | |
return [result, summarized_text2]; | |
# How to use: postSummaryWithBart("https://ethereum.org/en/what-is-ethereum/") | |
def postSummaryWithBart(blog_link): | |
# importing libraries | |
from transformers import pipeline | |
from bs4 import BeautifulSoup | |
import requests | |
# getting our blog post | |
URL = blog_link | |
r = requests.get(URL) | |
soup = BeautifulSoup(r.text, 'html.parser') | |
results = soup.find_all(['h1', 'p']) | |
text = [result.text for result in results] | |
ARTICLE = ' '.join(text) | |
# replacing punctuations with end-of-sentence tags | |
ARTICLE = ARTICLE.replace('.', '.') | |
ARTICLE = ARTICLE.replace('?', '?') | |
ARTICLE = ARTICLE.replace('!', '!') | |
sentences = ARTICLE.split('') | |
# chunking text | |
max_chunk = 500 | |
current_chunk = 0 | |
chunks = [] | |
for sentence in sentences: | |
# checking if we have an empty chunk | |
if len(chunks) == current_chunk + 1: | |
if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk: | |
chunks[current_chunk].extend(sentence.split(' ')) | |
else: | |
current_chunk += 1 | |
chunks.append(sentence.split(' ')) | |
else: | |
print(current_chunk) | |
chunks.append(sentence.split(' ')) | |
for chunk_id in range(len(chunks)): | |
chunks[chunk_id] = ' '.join(chunks[chunk_id]) | |
# summarizing text | |
res = summarizerbg(chunks, max_new_tokens=1024, min_length=30, do_sample=False) | |
text = ''.join([summ['summary_text'] for summ in res]) | |
# returning summary | |
return [sentences, text]; | |
# How to use: abstractiveSummaryWithPegasus("""Sample text to be summarized""") | |
def abstractiveSummaryWithPegasus(words): | |
# importing & loading model | |
from transformers import PegasusForConditionalGeneration, PegasusTokenizer | |
tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum") | |
# perform summarization | |
tokens = tokenizer(words, truncation=True, padding="longest", return_tensors="pt") | |
summary = summarizertx.generate(**tokens) | |
actual_summ = tokenizer.decode(summary[0]) | |
# returning summary | |
return actual_summ | |
# Main logic of the program | |
def process(uri, mode): | |
if mode == "Youtube": | |
return YTVideoToText(uri) | |
elif mode == "Blog": | |
return postSummaryWithBart(uri) | |
else: | |
raise ValueError("Invalid mode") | |
import gradio as gr | |
from gradio.mix import Series | |
with gr.Blocks() as ui: | |
gr.Markdown(""" | |
## Permet de faire le résumé d'une video youtube ou d'un article de blog | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
URI = gr.Textbox( | |
label="URI à résumer", | |
max_lines=1, | |
placeholder="https://youtube|website.ext", | |
api_name="uri" | |
) | |
TRANSCRIPT = gr.Textbox( | |
label="Transcript à résumer", | |
lines=10, | |
placeholder="https://youtube|website.ext", | |
api_name="transcript" | |
) | |
RESUME = gr.Textbox( | |
label="Résumé", | |
lines=10, | |
interactive=False, | |
placeholder="https://youtube|website.ext", | |
api_name="resume" | |
) | |
with gr.Column(): | |
MODE = gr.Radio( | |
label="Mode pour URI", | |
choices=["Youtube", "Blog"], | |
api_name="mode" | |
) | |
gr.Button("Process URI").click( | |
fn=process, | |
inputs=[URI, MODE], | |
outputs=[TRANSCRIPT, RESUME], | |
api_name="process_uri" | |
) | |
gr.Button("Process TEXT").click( | |
fn=abstractiveSummaryWithPegasus, | |
inputs=[TRANSCRIPT], | |
outputs=[RESUME], | |
api_name="process_text" | |
) | |
#translator_fr = gr.Interface.load("huggingface/Helsinki-NLP/opus-mt-fr-en") | |
#summarizer = gr.Interface.load("huggingface/sshleifer/distilbart-cnn-12-6") | |
ui.launch() | |