sakthi07's picture
uploaded app.py
0493811
raw
history blame
No virus
2.75 kB
import gradio as gr
import boto3
from botocore.exceptions import ClientError
import requests
import json
from transformers import AutoTokenizer, TFAutoModelForSeq2SeqLM
def get_secret():
secret_name = "NYTimesArticleAPI"
region_name = "ap-south-1"
session = boto3.session.Session()
client = session.client(
service_name='secretsmanager',
region_name=region_name
)
try:
get_secret_value_response = client.get_secret_value(
SecretId=secret_name
)
except ClientError as e:
raise e
secret = get_secret_value_response['SecretString']
secret_dict = json.loads(secret)
return secret_dict
def get_api():
api_key_dict = get_secret()
api_key_value = api_key_dict['ny_times_article_api']
return api_key_value
def get_abstracts(query):
api_key = get_api()
url = f'https://api.nytimes.com/svc/search/v2/articlesearch.json?q={query}&fq=source:("The New York Times")&api-key={api_key}'
response = requests.get(url).json()
abstracts = []
docs = response.get('response', {}).get('docs', [])
for doc in docs:
abstract = doc.get('abstract', '')
if abstract:
abstracts.append(abstract)
return abstracts
def summarizer(query):
abstracts = get_abstracts(query)
input_text = ' '.join(abstracts)
tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_billsum_model")
inputs = tokenizer(input_text, return_tensors="tf").input_ids
model = TFAutoModelForSeq2SeqLM.from_pretrained("stevhliu/my_awesome_billsum_model", from_pt=True)
outputs = model.generate(inputs, max_length=100, do_sample=False)
summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
return abstracts, summary
iface = gr.Interface(
fn=summarizer,
inputs=gr.inputs.Textbox(placeholder="Enter your query"),
# outputs=gr.outputs.Textbox(),
outputs=[
gr.outputs.Textbox(label="Abstracts"),
gr.outputs.Textbox(label="Summary")
],
title="New York Times Articles Summarizer",
description="This summarizer actually does not yet summarize New York Times articles because of certain limitations. Type in something like 'Manipur' or 'Novak Djokovic' you will get a summary of that topic. What actually happens is that the query goes through the API. The abstract of article's content is added or concatenated, and then a text of considerable length is generated. That text is then summarized. So, this is an article summarizer but summarizes only abstracts of a particular article, ensuring that readers get the essence of a topic. This is a successful implementation of a pretrained T5 Transformer model."
)
if __name__ == "__main__":
iface.launch()