Jingxiang Mo commited on
Commit
4071dd4
1 Parent(s): 3d68848

Fix flagging

Browse files
Files changed (3) hide show
  1. __pycache__/app.cpython-39.pyc +0 -0
  2. app.py +3 -9
  3. flagged/log.csv +18 -0
__pycache__/app.cpython-39.pyc CHANGED
Binary files a/__pycache__/app.cpython-39.pyc and b/__pycache__/app.cpython-39.pyc differ
 
app.py CHANGED
@@ -36,7 +36,6 @@ extractor = KeyphraseExtractionPipeline(model=keyPhraseExtractionModel)
36
  model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
37
  tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
38
 
39
- #TODO: add further preprocessing
40
  def keyphrases_extraction(text: str) -> str:
41
  keyphrases = extractor(text)
42
  return keyphrases
@@ -78,12 +77,10 @@ def answer_question(question):
78
  # ======== Tokenize ========
79
  # Apply the tokenizer to the input text, treating them as a text-pair.
80
 
81
-
82
  input_ids = tokenizer.encode(question, context)
83
  question_ids = input_ids[:input_ids.index(tokenizer.sep_token_id)+1]
84
 
85
  # Report how long the input sequence is. if longer than 512 tokens divide it multiple sequences
86
-
87
  length_of_group = 512 - len(question_ids)
88
  input_ids_without_question = input_ids[input_ids.index(tokenizer.sep_token_id)+1:]
89
  print(f"Query has {len(input_ids)} tokens, divided in {len(input_ids_without_question)//length_of_group + 1}.\n")
@@ -126,8 +123,6 @@ def answer_question(question):
126
  print(max_start_score)
127
  print(max_end_score)
128
 
129
-
130
-
131
  # ======== Reconstruct Answer ========
132
  # Find the tokens with the highest `start` and `end` scores.
133
 
@@ -161,21 +156,20 @@ def answer_question(question):
161
  return final_answer
162
 
163
  # =====[ DEFINE INTERFACE ]===== #'
164
- title = "Azza Q/A Agent"
165
  examples = [
166
  ["Where is the Eiffel Tower?"],
167
  ["What is the population of France?"]
168
  ]
169
- print("hello")
170
  demo = gr.Interface(
171
  title = title,
172
 
173
  fn=answer_question,
174
  inputs = "text",
175
  outputs = "text",
176
-
177
  examples=examples,
 
178
  )
179
 
180
  if __name__ == "__main__":
181
- demo.launch()
 
36
  model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
37
  tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
38
 
 
39
  def keyphrases_extraction(text: str) -> str:
40
  keyphrases = extractor(text)
41
  return keyphrases
 
77
  # ======== Tokenize ========
78
  # Apply the tokenizer to the input text, treating them as a text-pair.
79
 
 
80
  input_ids = tokenizer.encode(question, context)
81
  question_ids = input_ids[:input_ids.index(tokenizer.sep_token_id)+1]
82
 
83
  # Report how long the input sequence is. if longer than 512 tokens divide it multiple sequences
 
84
  length_of_group = 512 - len(question_ids)
85
  input_ids_without_question = input_ids[input_ids.index(tokenizer.sep_token_id)+1:]
86
  print(f"Query has {len(input_ids)} tokens, divided in {len(input_ids_without_question)//length_of_group + 1}.\n")
 
123
  print(max_start_score)
124
  print(max_end_score)
125
 
 
 
126
  # ======== Reconstruct Answer ========
127
  # Find the tokens with the highest `start` and `end` scores.
128
 
 
156
  return final_answer
157
 
158
  # =====[ DEFINE INTERFACE ]===== #'
159
+ title = "Azza Knowledge Agent"
160
  examples = [
161
  ["Where is the Eiffel Tower?"],
162
  ["What is the population of France?"]
163
  ]
 
164
  demo = gr.Interface(
165
  title = title,
166
 
167
  fn=answer_question,
168
  inputs = "text",
169
  outputs = "text",
 
170
  examples=examples,
171
+ allow_flagging="never",
172
  )
173
 
174
  if __name__ == "__main__":
175
+ demo.launch()
flagged/log.csv CHANGED
@@ -30,3 +30,21 @@ Classical methods look at the frequency, occurrence and order of words
30
  in the text, whereas these neural approaches can capture long-term
31
  semantic dependencies and context of words in a text.",,,,2023-02-25 21:18:38.087039
32
  ,,,,2023-02-25 21:18:40.419138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  in the text, whereas these neural approaches can capture long-term
31
  semantic dependencies and context of words in a text.",,,,2023-02-25 21:18:38.087039
32
  ,,,,2023-02-25 21:18:40.419138
33
+ ,,,,2023-03-25 16:14:25.639391
34
+ ,,,,2023-03-25 16:14:26.687979
35
+ Where is the Eiffel Tower?,"champ de mars in paris , france",,,2023-03-25 16:14:31.944106
36
+ Where is the Eiffel Tower?,"champ de mars in paris , france",,,2023-03-25 16:14:32.552069
37
+ Where is the Eiffel Tower?,"champ de mars in paris , france",,,2023-03-25 16:14:33.042101
38
+ Where is the Eiffel Tower?,"champ de mars in paris , france",,,2023-03-25 16:14:33.371218
39
+ ,,,,2023-03-25 16:22:32.147004
40
+ ,,,,2023-03-25 16:22:41.666675
41
+ ,,,,2023-03-25 16:22:42.128605
42
+ ,,,,2023-03-25 16:22:42.314580
43
+ ,,,,2023-03-25 16:22:42.474549
44
+ ,,,,2023-03-25 16:22:42.639555
45
+ ,,,,2023-03-25 16:22:42.830877
46
+ Where is the Eiffel Tower?,"champ de mars in paris , france",,,2023-03-25 16:26:57.282664
47
+ Where is the Eiffel Tower?,"champ de mars in paris , france",,,2023-03-25 16:27:40.137523
48
+ Where is the Eiffel Tower?,"champ de mars in paris , france",,,2023-03-25 16:27:40.939901
49
+ Where is the Eiffel Tower?,"champ de mars in paris , france",,,2023-03-25 16:27:41.198962
50
+ Where is the Eiffel Tower?,"champ de mars in paris , france",,,2023-03-25 16:27:41.365459