Working Questgen implemented
Browse files- .gitignore +3 -1
- app.py +54 -19
- requirements.txt +4 -1
.gitignore
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
venv
|
2 |
.vscode
|
3 |
s2v_reddit_2015_md.tar.gz
|
4 |
-
__pycache__
|
|
|
|
|
|
1 |
venv
|
2 |
.vscode
|
3 |
s2v_reddit_2015_md.tar.gz
|
4 |
+
__pycache__
|
5 |
+
s2v_old
|
6 |
+
._s2v_old
|
app.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
import time
|
2 |
import gradio as gr
|
3 |
from transformers import AutoTokenizer
|
@@ -5,6 +7,19 @@ import os
|
|
5 |
from pathlib import Path
|
6 |
from FastT5 import get_onnx_runtime_sessions, OnnxT5
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
trained_model_path = './t5_squad_v1/'
|
10 |
|
@@ -42,25 +57,10 @@ def get_question(sentence, answer, mdl, tknizer):
|
|
42 |
dec = [tknizer.decode(ids, skip_special_tokens=True) for ids in outs]
|
43 |
|
44 |
Question = dec[0].replace("question:", "")
|
45 |
-
|
46 |
return Question
|
47 |
|
48 |
|
49 |
-
# context = "Ramsri loves to watch cricket during his free time"
|
50 |
-
# answer = "cricket"
|
51 |
-
context = "Donald Trump is an American media personality and businessman who served as the 45th president of the United States."
|
52 |
-
answer = "Donald Trump"
|
53 |
-
ques = get_question(context, answer, model, tokenizer)
|
54 |
-
print("question: ", ques)
|
55 |
-
|
56 |
-
|
57 |
-
context = gr.components.Textbox(
|
58 |
-
lines=5, placeholder="Enter paragraph/context here...")
|
59 |
-
answer = gr.components.Textbox(
|
60 |
-
lines=3, placeholder="Enter answer/keyword here...")
|
61 |
-
question = gr.components.Textbox(type="text", label="Question")
|
62 |
-
|
63 |
-
|
64 |
def generate_question(context, answer):
|
65 |
start_time = time.time() # Record the start time
|
66 |
result = get_question(context, answer, model, tokenizer)
|
@@ -70,10 +70,45 @@ def generate_question(context, answer):
|
|
70 |
return result
|
71 |
|
72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
iface = gr.Interface(
|
74 |
-
fn=
|
75 |
-
inputs=
|
76 |
-
outputs=
|
|
|
|
|
77 |
)
|
78 |
|
79 |
iface.launch()
|
|
|
1 |
+
import pke
|
2 |
+
from sense2vec import Sense2Vec
|
3 |
import time
|
4 |
import gradio as gr
|
5 |
from transformers import AutoTokenizer
|
|
|
7 |
from pathlib import Path
|
8 |
from FastT5 import get_onnx_runtime_sessions, OnnxT5
|
9 |
|
10 |
+
commands = [
|
11 |
+
"curl -LO https://github.com/explosion/sense2vec/releases/download/v1.0.0/s2v_reddit_2015_md.tar.gz",
|
12 |
+
"tar -xvf s2v_reddit_2015_md.tar.gz",
|
13 |
+
]
|
14 |
+
|
15 |
+
for command in commands:
|
16 |
+
return_code = os.system(command)
|
17 |
+
if return_code == 0:
|
18 |
+
print(f"Command '{command}' executed successfully")
|
19 |
+
else:
|
20 |
+
print(f"Command '{command}' failed with return code {return_code}")
|
21 |
+
|
22 |
+
s2v = Sense2Vec().from_disk("s2v_old")
|
23 |
|
24 |
trained_model_path = './t5_squad_v1/'
|
25 |
|
|
|
57 |
dec = [tknizer.decode(ids, skip_special_tokens=True) for ids in outs]
|
58 |
|
59 |
Question = dec[0].replace("question:", "")
|
60 |
+
Question = Question.strip()
|
61 |
return Question
|
62 |
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
def generate_question(context, answer):
|
65 |
start_time = time.time() # Record the start time
|
66 |
result = get_question(context, answer, model, tokenizer)
|
|
|
70 |
return result
|
71 |
|
72 |
|
73 |
+
def generate_mcq(context):
|
74 |
+
extractor = pke.unsupervised.TopicRank()
|
75 |
+
extractor.load_document(input=context, language='en')
|
76 |
+
extractor.candidate_selection(pos={"NOUN", "PROPN", "ADJ"})
|
77 |
+
extractor.candidate_weighting()
|
78 |
+
keyphrases = extractor.get_n_best(n=10)
|
79 |
+
|
80 |
+
results = []
|
81 |
+
|
82 |
+
for keyword, _ in keyphrases:
|
83 |
+
original_keyword = keyword
|
84 |
+
keyword = original_keyword.lower().replace(" ", "_")
|
85 |
+
sense = s2v.get_best_sense(keyword)
|
86 |
+
|
87 |
+
if sense is not None:
|
88 |
+
most_similar = s2v.most_similar(sense, n=2)
|
89 |
+
distractors = [word.split("|")[0].lower().replace(
|
90 |
+
"_", " ") for word, _ in most_similar]
|
91 |
+
|
92 |
+
question = generate_question(context, original_keyword)
|
93 |
+
|
94 |
+
result = {
|
95 |
+
"Question": question,
|
96 |
+
"Keyword": original_keyword,
|
97 |
+
"Distractor1": distractors[0],
|
98 |
+
"Distractor2": distractors[1]
|
99 |
+
}
|
100 |
+
|
101 |
+
results.append(result)
|
102 |
+
|
103 |
+
return results
|
104 |
+
|
105 |
+
|
106 |
iface = gr.Interface(
|
107 |
+
fn=generate_mcq,
|
108 |
+
inputs=gr.Textbox(label="Context", type='text'),
|
109 |
+
outputs=gr.JSON(value=list),
|
110 |
+
title="Questgen AI",
|
111 |
+
description="Enter a context to generate MCQs for keywords."
|
112 |
)
|
113 |
|
114 |
iface.launch()
|
requirements.txt
CHANGED
@@ -5,4 +5,7 @@ torch
|
|
5 |
transformers
|
6 |
sentencepiece
|
7 |
progress
|
8 |
-
psutil
|
|
|
|
|
|
|
|
5 |
transformers
|
6 |
sentencepiece
|
7 |
progress
|
8 |
+
psutil
|
9 |
+
sense2vec
|
10 |
+
git+https://github.com/boudinfl/pke.git
|
11 |
+
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0-py3-none-any.whl
|