Mary12 commited on
Commit
163cb0f
1 Parent(s): d5c2598

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -4
app.py CHANGED
@@ -3,7 +3,18 @@ from transformers import AutoTokenizer, AutoModelForQuestionAnswering
3
  import torch
4
  import transformers
5
  from transformers import pipeline
 
 
6
 
 
 
 
 
 
 
 
 
 
7
  def model(model_name):
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
9
  model = AutoModelForQuestionAnswering.from_pretrained(model_name,return_dict = False)
@@ -20,10 +31,8 @@ def qa_result(context, question):
20
  pipe = model(model_name)
21
  result = pipe(question = question, context=context)
22
  answered = result['answer']
23
- if answered[0].islower():
24
- answered[0] = answered[0].upper()
25
- # answered = answered.replace(answered[len(answered)-1], ":")
26
- return answered
27
 
28
  theme = gr.themes.Soft().set(
29
  body_background_fill='*background_fill_secondary',
 
3
  import torch
4
  import transformers
5
  from transformers import pipeline
6
+ import re
7
+ import html
8
 
9
+ def remove_references(text):
10
+ text = re.sub(r'\[\d+\]', '', text) ##[ref]
11
+ text = re.sub(r'\[https?://[^\[\]]+\s[^\[\]]+\]', '', text) ##hyperlink with text
12
+ text = re.sub(r'\[https?://[^\[\]]+\]', '', text) ##just the hyperlink
13
+ text = html.unescape(text)
14
+ text = re.sub(r'\s+', ' ', text).strip() ##clear out the white spaces
15
+ return text
16
+
17
+
18
  def model(model_name):
19
  tokenizer = AutoTokenizer.from_pretrained(model_name)
20
  model = AutoModelForQuestionAnswering.from_pretrained(model_name,return_dict = False)
 
31
  pipe = model(model_name)
32
  result = pipe(question = question, context=context)
33
  answered = result['answer']
34
+ text = remove_references(answered)
35
+ return text.capitalize()
 
 
36
 
37
  theme = gr.themes.Soft().set(
38
  body_background_fill='*background_fill_secondary',