|
import gradio as gr |
|
import re |
|
|
|
from gradio.mix import Parallel |
|
from transformers import ( |
|
AutoTokenizer, |
|
AutoModelForSeq2SeqLM, |
|
) |
|
|
|
|
|
def clean_text(text): |
|
text = text.encode("ascii", errors="ignore").decode( |
|
"ascii" |
|
) |
|
text = re.sub(r"http\S+", "", text) |
|
text = re.sub(r"ADVERTISEMENT", " ", text) |
|
text = re.sub(r"\n", " ", text) |
|
text = re.sub(r"\n\n", " ", text) |
|
text = re.sub(r"\t", " ", text) |
|
text = text.strip(" ") |
|
text = re.sub( |
|
" +", " ", text |
|
).strip() |
|
return text |
|
|
|
|
|
modchoice_1 = "chinhon/pegasus-large-commentaries_hd" |
|
|
|
def commentaries_headline1(text): |
|
input_text = clean_text(text) |
|
|
|
tokenizer_1 = AutoTokenizer.from_pretrained(modchoice_1) |
|
|
|
model_1 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_1) |
|
|
|
with tokenizer_1.as_target_tokenizer(): |
|
batch = tokenizer_1( |
|
input_text, truncation=True, padding="longest", return_tensors="pt" |
|
) |
|
|
|
translated = model_1.generate(**batch) |
|
|
|
summary_1 = tokenizer_1.batch_decode(translated, skip_special_tokens=True) |
|
|
|
return summary_1[0] |
|
|
|
|
|
headline1 = gr.Interface( |
|
fn=commentaries_headline1, |
|
inputs=gr.inputs.Textbox(), |
|
outputs=gr.outputs.Textbox(label=" | Model: Fine tuned pegasus-large"), |
|
) |
|
|
|
modchoice_2 = "chinhon/pegasus-multi_news-commentaries_hdwriter" |
|
|
|
def commentaries_headline2(text): |
|
input_text = clean_text(text) |
|
|
|
tokenizer_2 = AutoTokenizer.from_pretrained(modchoice_2) |
|
|
|
model_2 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_2) |
|
|
|
with tokenizer_2.as_target_tokenizer(): |
|
batch = tokenizer_2( |
|
input_text, truncation=True, padding="longest", return_tensors="pt" |
|
) |
|
|
|
translated = model_2.generate(**batch) |
|
|
|
summary_2 = tokenizer_2.batch_decode(translated, skip_special_tokens=True) |
|
|
|
return summary_2[0] |
|
|
|
headline2 = gr.Interface( |
|
fn=commentaries_headline2, |
|
inputs=gr.inputs.Textbox(), |
|
outputs=gr.outputs.Textbox(label=" | Model: Fine tuned pegasus-multi_news"), |
|
) |
|
|
|
|
|
modchoice_3 = "chinhon/bart-large-commentaries_hdwriter" |
|
|
|
def commentaries_headline3(text): |
|
input_text = clean_text(text) |
|
|
|
tokenizer_3 = AutoTokenizer.from_pretrained(modchoice_3) |
|
|
|
model_3 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_3) |
|
|
|
with tokenizer_3.as_target_tokenizer(): |
|
batch = tokenizer_3( |
|
input_text, truncation=True, padding="longest", return_tensors="pt" |
|
) |
|
|
|
translated = model_3.generate(**batch) |
|
|
|
summary_3 = tokenizer_3.batch_decode( |
|
translated, skip_special_tokens=True, max_length=100 |
|
) |
|
|
|
return summary_3[0] |
|
|
|
|
|
headline3 = gr.Interface( |
|
fn=commentaries_headline3, |
|
inputs=gr.inputs.Textbox(), |
|
outputs=gr.outputs.Textbox(label=" | Model: Fine tuned bart-large"), |
|
) |
|
|
|
|
|
Parallel( |
|
headline1, |
|
headline2, |
|
headline3, |
|
title="Commentaries Headlines Generator", |
|
inputs=gr.inputs.Textbox( |
|
lines=20, |
|
label="Paste parts of your commentary here, and choose from 3 suggested headlines", |
|
), |
|
theme="huggingface", |
|
).launch(enable_queue=True) |
|
|