PSM272 commited on
Commit
6972a16
1 Parent(s): 5f71899

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import wikipedia as wiki
2
+ import pprint as pp
3
+ from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline, AutoModelForSeq2SeqLM
4
+ import torch
5
+ import gradio as gr
6
+
7
+ def greet(name):
8
+ #question = 'Why is the sky blue?'
9
+ question = name
10
+
11
+ results = wiki.search(question)
12
+ #print("Wikipedia search results for our question:\n")
13
+ #pp.pprint(results)
14
+
15
+ page = wiki.page(results[0])
16
+ text = page.content
17
+ #print(f"\nThe {results[0]} Wikipedia article contains {len(text)} characters.")
18
+
19
+ #print(text)
20
+
21
+
22
+ model_name = "deepset/roberta-base-squad2"
23
+
24
+ #from transformers import AutoModel
25
+
26
+ #model_name = AutoModelForQuestionAnswering.from_pretrained('./roberta-base-squad2/')
27
+
28
+ def get_sentence(text, pos):
29
+ start = text.rfind('.', 0, pos) + 1
30
+ end = text.find('.', pos)
31
+ if end == -1:
32
+ end = len(text)
33
+ return text[start:end].strip()
34
+
35
+
36
+ # a) Get predictions
37
+ nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)
38
+ QA_input = {
39
+ 'question': question,
40
+ 'context': text
41
+ }
42
+ res = nlp(QA_input)
43
+
44
+ # b) Load model & tokenizer
45
+ model = AutoModelForQuestionAnswering.from_pretrained(model_name)
46
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
47
+ print("{'answer': '"+res['answer']+"', 'text': '")
48
+ #print(res['answer'])
49
+ #print("', 'text': '")
50
+
51
+ position = res['start']
52
+ #words = sum(map(str.split, text), [])
53
+ #sentence = ' '.join(words[position-1:]).split('.')[0] + '.'
54
+
55
+ print(get_sentence(text, position)+'.')
56
+
57
+ tokenizer = AutoTokenizer.from_pretrained("tuner007/pegasus_paraphrase")
58
+
59
+ model = AutoModelForSeq2SeqLM.from_pretrained("tuner007/pegasus_paraphrase")
60
+
61
+ torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
62
+
63
+ def get_response(input_text,num_return_sequences,num_beams):
64
+ batch = tokenizer([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
65
+ translated = model.generate(**batch,max_length=60,num_beams=num_beams, num_return_sequences=num_return_sequences, temperature=1.5)
66
+ tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
67
+ return tgt_text
68
+
69
+
70
+ num_beams = 20
71
+ num_return_sequences = 1
72
+ context = get_sentence(text, position)+'.'
73
+ print(get_response(context,num_return_sequences,num_beams)[0])
74
+ print("'}")
75
+
76
+ demo = gr.Interface(fn=greet, inputs="text", outputs="text")
77
+
78
+ demo.launch()