sakthi07 commited on
Commit
0493811
1 Parent(s): 57a1596

uploaded app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import boto3
3
+ from botocore.exceptions import ClientError
4
+ import requests
5
+ import json
6
+ from transformers import AutoTokenizer, TFAutoModelForSeq2SeqLM
7
+
8
+ def get_secret():
9
+ secret_name = "NYTimesArticleAPI"
10
+ region_name = "ap-south-1"
11
+ session = boto3.session.Session()
12
+ client = session.client(
13
+ service_name='secretsmanager',
14
+ region_name=region_name
15
+ )
16
+ try:
17
+ get_secret_value_response = client.get_secret_value(
18
+ SecretId=secret_name
19
+ )
20
+ except ClientError as e:
21
+ raise e
22
+ secret = get_secret_value_response['SecretString']
23
+ secret_dict = json.loads(secret)
24
+ return secret_dict
25
+
26
+ def get_api():
27
+ api_key_dict = get_secret()
28
+ api_key_value = api_key_dict['ny_times_article_api']
29
+ return api_key_value
30
+
31
+ def get_abstracts(query):
32
+ api_key = get_api()
33
+ url = f'https://api.nytimes.com/svc/search/v2/articlesearch.json?q={query}&fq=source:("The New York Times")&api-key={api_key}'
34
+ response = requests.get(url).json()
35
+ abstracts = []
36
+ docs = response.get('response', {}).get('docs', [])
37
+ for doc in docs:
38
+ abstract = doc.get('abstract', '')
39
+ if abstract:
40
+ abstracts.append(abstract)
41
+ return abstracts
42
+
43
+ def summarizer(query):
44
+ abstracts = get_abstracts(query)
45
+ input_text = ' '.join(abstracts)
46
+
47
+ tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_billsum_model")
48
+ inputs = tokenizer(input_text, return_tensors="tf").input_ids
49
+
50
+ model = TFAutoModelForSeq2SeqLM.from_pretrained("stevhliu/my_awesome_billsum_model", from_pt=True)
51
+ outputs = model.generate(inputs, max_length=100, do_sample=False)
52
+
53
+ summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
54
+ return abstracts, summary
55
+
56
+ iface = gr.Interface(
57
+ fn=summarizer,
58
+ inputs=gr.inputs.Textbox(placeholder="Enter your query"),
59
+ # outputs=gr.outputs.Textbox(),
60
+ outputs=[
61
+ gr.outputs.Textbox(label="Abstracts"),
62
+ gr.outputs.Textbox(label="Summary")
63
+ ],
64
+ title="New York Times Articles Summarizer",
65
+ description="This summarizer actually does not yet summarize New York Times articles because of certain limitations. Type in something like 'Manipur' or 'Novak Djokovic' you will get a summary of that topic. What actually happens is that the query goes through the API. The abstract of article's content is added or concatenated, and then a text of considerable length is generated. That text is then summarized. So, this is an article summarizer but summarizes only abstracts of a particular article, ensuring that readers get the essence of a topic. This is a successful implementation of a pretrained T5 Transformer model."
66
+ )
67
+
68
+ if __name__ == "__main__":
69
+ iface.launch()