Question-gen / app.py
Vaibhavbrkn's picture
Update app.py
6f63374
raw
history blame contribute delete
No virus
3.47 kB
import gradio as gr
import numpy as np
from keybert import KeyBERT
import random
from transformers import (
T5ForConditionalGeneration,
T5Tokenizer,
)
import re
import transformers
import torch
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
MAX_LEN = 512
tokenizer = T5Tokenizer.from_pretrained('t5-base')
model = T5ForConditionalGeneration.from_pretrained(
'Vaibhavbrkn/question-gen')
mod = KeyBERT('distilbert-base-nli-mean-tokens')
model.to(DEVICE)
context = "The Transgender Persons Bill, 2016 was hurriedly passed in the Lok Sabha, amid much outcry from the very community it claims to protect."
def filter_keyword(data, ran=5):
ap = []
real = []
res = re.sub(r'-', ' ', data)
res = re.sub(r'[^\w\s\.\,]', '', res)
for i in range(1, 4):
ap.append(mod.extract_keywords(
res, keyphrase_ngram_range=(1, i), diversity=0.7, top_n=ran*2))
for i in range(3):
for j in range(len(ap[i])):
if ap[i][j][0].lower() in res.lower():
real.append(ap[i][j])
real = sorted(real, key=lambda x: x[1], reverse=True)
ap = []
st = ""
for i in range(len(real)):
if real[i][0] in st:
continue
else:
ap.append(real[i])
st += real[i][0] + " "
if len(ap) == ran:
break
return ap
# FOR BAD label negative or bottom 3
def func(context, slide):
slide = int(slide)
randomness = 0.4
orig = int(np.ceil(randomness * slide))
temp = slide - orig
ap = filter_keyword(context, ran=slide*2)
outputs = []
print(slide)
print(orig)
print(ap)
for i in range(orig):
inputs = "context: " + context + " keyword: " + ap[i][0]
source_tokenizer = tokenizer.encode_plus(
inputs, max_length=512, pad_to_max_length=True, return_tensors="pt")
outs = model.generate(input_ids=source_tokenizer['input_ids'].to(
DEVICE), attention_mask=source_tokenizer['attention_mask'].to(DEVICE), max_length=50)
dec = [tokenizer.decode(ids) for ids in outs][0]
st = dec.replace("<pad> ", "")
st = st.replace("</s>", "")
if ap[i][1] > 0.0:
outputs.append((st, "Good"))
else:
outputs.append((st, "Bad"))
del ap[: orig]
print("first",outputs)
print(temp)
if temp > 0:
for i in range(temp):
keyword = random.choice(ap)
inputs = "context: " + context + " keyword: " + keyword[0]
source_tokenizer = tokenizer.encode_plus(
inputs, max_length=512, pad_to_max_length=True, return_tensors="pt")
outs = model.generate(input_ids=source_tokenizer['input_ids'].to(
DEVICE), attention_mask=source_tokenizer['attention_mask'].to(DEVICE), max_length=50)
dec = [tokenizer.decode(ids) for ids in outs][0]
st = dec.replace("<pad> ", "")
st = st.replace("</s>", "")
if keyword[1] > 0.0:
outputs.append((st, "Good"))
else:
outputs.append((st, "Bad"))
print("second",outputs)
return outputs
gr.Interface(func,
[
gr.inputs.Textbox(lines=10, label="context"),
gr.inputs.Slider(minimum=1, maximum=5,
default=1, label="No of Question"),
],
gr.outputs.KeyValues()).launch()