|
import torch |
|
import gradio |
|
from transformers import pipeline |
|
from bs4 import BeautifulSoup |
|
import requests |
|
|
|
def SUMMARIZE(Url): |
|
summarizer = pipeline("summarization", model="stevhliu/my_awesome_billsum_model") |
|
r = requests.get(Url) |
|
soup = BeautifulSoup(r.text, 'html.parser') |
|
results = soup.find_all(['hl', 'p']) |
|
text = [result.text for result in results] |
|
Article = ''.join(text) |
|
sentences = Article.split(' ') |
|
current_chunk = 0 |
|
chunks = [] |
|
for sentence in sentences: |
|
if len(chunks) == current_chunk + 1: |
|
if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk: |
|
chunks[current_chunk].extend(sentence.split(' ')) |
|
else: |
|
current_chunk += 1 |
|
chunks.append(sentence.split(' ')) |
|
else: |
|
|
|
chunks.append(sentence.split(' ')) |
|
|
|
for chunk_id in range(len(chunks)): |
|
chunks[chunk_id] = ' '.join(chunks[chunk_id]) |
|
res = summarizer(chunks, max_length=120, min_length=30, do_sample=False) |
|
for i in range(len(res)): |
|
return res[i].values() |
|
|
|
|
|
interface = gradio.Interface(fn=SUMMARIZE, |
|
inputs=gradio.TextArea(lines=2, value="https://medium.com/analytics-vidhya/openai-gpt-3-language-models-are-few-shot-learners-82531b3d3122"), |
|
outputs=gradio.TextArea()) |
|
interface.launch(share=True) |