Spaces:
Runtime error
Runtime error
Jingxiang Mo
commited on
Commit
•
4071dd4
1
Parent(s):
3d68848
Fix flagging
Browse files- __pycache__/app.cpython-39.pyc +0 -0
- app.py +3 -9
- flagged/log.csv +18 -0
__pycache__/app.cpython-39.pyc
CHANGED
Binary files a/__pycache__/app.cpython-39.pyc and b/__pycache__/app.cpython-39.pyc differ
|
|
app.py
CHANGED
@@ -36,7 +36,6 @@ extractor = KeyphraseExtractionPipeline(model=keyPhraseExtractionModel)
|
|
36 |
model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
|
37 |
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
|
38 |
|
39 |
-
#TODO: add further preprocessing
|
40 |
def keyphrases_extraction(text: str) -> str:
|
41 |
keyphrases = extractor(text)
|
42 |
return keyphrases
|
@@ -78,12 +77,10 @@ def answer_question(question):
|
|
78 |
# ======== Tokenize ========
|
79 |
# Apply the tokenizer to the input text, treating them as a text-pair.
|
80 |
|
81 |
-
|
82 |
input_ids = tokenizer.encode(question, context)
|
83 |
question_ids = input_ids[:input_ids.index(tokenizer.sep_token_id)+1]
|
84 |
|
85 |
# Report how long the input sequence is. if longer than 512 tokens divide it multiple sequences
|
86 |
-
|
87 |
length_of_group = 512 - len(question_ids)
|
88 |
input_ids_without_question = input_ids[input_ids.index(tokenizer.sep_token_id)+1:]
|
89 |
print(f"Query has {len(input_ids)} tokens, divided in {len(input_ids_without_question)//length_of_group + 1}.\n")
|
@@ -126,8 +123,6 @@ def answer_question(question):
|
|
126 |
print(max_start_score)
|
127 |
print(max_end_score)
|
128 |
|
129 |
-
|
130 |
-
|
131 |
# ======== Reconstruct Answer ========
|
132 |
# Find the tokens with the highest `start` and `end` scores.
|
133 |
|
@@ -161,21 +156,20 @@ def answer_question(question):
|
|
161 |
return final_answer
|
162 |
|
163 |
# =====[ DEFINE INTERFACE ]===== #'
|
164 |
-
title = "Azza
|
165 |
examples = [
|
166 |
["Where is the Eiffel Tower?"],
|
167 |
["What is the population of France?"]
|
168 |
]
|
169 |
-
print("hello")
|
170 |
demo = gr.Interface(
|
171 |
title = title,
|
172 |
|
173 |
fn=answer_question,
|
174 |
inputs = "text",
|
175 |
outputs = "text",
|
176 |
-
|
177 |
examples=examples,
|
|
|
178 |
)
|
179 |
|
180 |
if __name__ == "__main__":
|
181 |
-
demo.launch()
|
|
|
36 |
model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
|
37 |
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
|
38 |
|
|
|
39 |
def keyphrases_extraction(text: str) -> str:
|
40 |
keyphrases = extractor(text)
|
41 |
return keyphrases
|
|
|
77 |
# ======== Tokenize ========
|
78 |
# Apply the tokenizer to the input text, treating them as a text-pair.
|
79 |
|
|
|
80 |
input_ids = tokenizer.encode(question, context)
|
81 |
question_ids = input_ids[:input_ids.index(tokenizer.sep_token_id)+1]
|
82 |
|
83 |
# Report how long the input sequence is. if longer than 512 tokens divide it multiple sequences
|
|
|
84 |
length_of_group = 512 - len(question_ids)
|
85 |
input_ids_without_question = input_ids[input_ids.index(tokenizer.sep_token_id)+1:]
|
86 |
print(f"Query has {len(input_ids)} tokens, divided in {len(input_ids_without_question)//length_of_group + 1}.\n")
|
|
|
123 |
print(max_start_score)
|
124 |
print(max_end_score)
|
125 |
|
|
|
|
|
126 |
# ======== Reconstruct Answer ========
|
127 |
# Find the tokens with the highest `start` and `end` scores.
|
128 |
|
|
|
156 |
return final_answer
|
157 |
|
158 |
# =====[ DEFINE INTERFACE ]===== #'
|
159 |
+
title = "Azza Knowledge Agent"
|
160 |
examples = [
|
161 |
["Where is the Eiffel Tower?"],
|
162 |
["What is the population of France?"]
|
163 |
]
|
|
|
164 |
demo = gr.Interface(
|
165 |
title = title,
|
166 |
|
167 |
fn=answer_question,
|
168 |
inputs = "text",
|
169 |
outputs = "text",
|
|
|
170 |
examples=examples,
|
171 |
+
allow_flagging="never",
|
172 |
)
|
173 |
|
174 |
if __name__ == "__main__":
|
175 |
+
demo.launch()
|
flagged/log.csv
CHANGED
@@ -30,3 +30,21 @@ Classical methods look at the frequency, occurrence and order of words
|
|
30 |
in the text, whereas these neural approaches can capture long-term
|
31 |
semantic dependencies and context of words in a text.",,,,2023-02-25 21:18:38.087039
|
32 |
,,,,2023-02-25 21:18:40.419138
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
in the text, whereas these neural approaches can capture long-term
|
31 |
semantic dependencies and context of words in a text.",,,,2023-02-25 21:18:38.087039
|
32 |
,,,,2023-02-25 21:18:40.419138
|
33 |
+
,,,,2023-03-25 16:14:25.639391
|
34 |
+
,,,,2023-03-25 16:14:26.687979
|
35 |
+
Where is the Eiffel Tower?,"champ de mars in paris , france",,,2023-03-25 16:14:31.944106
|
36 |
+
Where is the Eiffel Tower?,"champ de mars in paris , france",,,2023-03-25 16:14:32.552069
|
37 |
+
Where is the Eiffel Tower?,"champ de mars in paris , france",,,2023-03-25 16:14:33.042101
|
38 |
+
Where is the Eiffel Tower?,"champ de mars in paris , france",,,2023-03-25 16:14:33.371218
|
39 |
+
,,,,2023-03-25 16:22:32.147004
|
40 |
+
,,,,2023-03-25 16:22:41.666675
|
41 |
+
,,,,2023-03-25 16:22:42.128605
|
42 |
+
,,,,2023-03-25 16:22:42.314580
|
43 |
+
,,,,2023-03-25 16:22:42.474549
|
44 |
+
,,,,2023-03-25 16:22:42.639555
|
45 |
+
,,,,2023-03-25 16:22:42.830877
|
46 |
+
Where is the Eiffel Tower?,"champ de mars in paris , france",,,2023-03-25 16:26:57.282664
|
47 |
+
Where is the Eiffel Tower?,"champ de mars in paris , france",,,2023-03-25 16:27:40.137523
|
48 |
+
Where is the Eiffel Tower?,"champ de mars in paris , france",,,2023-03-25 16:27:40.939901
|
49 |
+
Where is the Eiffel Tower?,"champ de mars in paris , france",,,2023-03-25 16:27:41.198962
|
50 |
+
Where is the Eiffel Tower?,"champ de mars in paris , france",,,2023-03-25 16:27:41.365459
|