Spaces:
Runtime error
Runtime error
import pyjokes | |
import gradio as gr | |
import numpy as np | |
from nltk.corpus import wordnet as wn | |
from sentence_transformers import SentenceTransformer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import nltk | |
nltk.download('all') | |
import string | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
# import fastai | |
def similarity(input, joke): | |
return cosine_similarity(input, joke) | |
def get_best(input): | |
model = SentenceTransformer('bert-base-nli-mean-tokens') | |
max_similarity = -1 | |
max_idx = 0 | |
jokes = pyjokes.get_jokes(language='en', category='all') | |
jokes_embedding = model.encode(jokes) | |
input_embedding = model.encode(input) | |
for idx, joke_embedding in enumerate(jokes_embedding): | |
sim = similarity(joke_embedding.reshape(-1, 1), | |
input_embedding.reshape(-1, 1)) | |
if(np.sum(sim) > np.sum(max_similarity)): | |
max_idx = idx | |
max_similarity = sim | |
if(np.sum(max_similarity) != -1): | |
return jokes[max_idx]+'ππ€£' | |
else: | |
return None | |
def generate_list(input): | |
result = [] | |
n = len(input) | |
for Len in range(2, n + 1): | |
for i in range(n - Len + 1): | |
j = i + Len - 1 | |
tem = "" | |
for k in range(i, j + 1): | |
tem += input[k] | |
result.append(tem) | |
return result | |
def pattern(input): | |
response = input | |
for substr in generate_list(input): | |
try: | |
syn = wn.synsets(substr)[1].hypernyms()[0].hyponyms()[ | |
0].hyponyms()[0].lemmas()[0].name() | |
except: | |
continue | |
if(syn != None): | |
response = response.replace(substr, syn.upper()) | |
break | |
if(input == response): | |
return None | |
else: | |
return response+'??ππ€£' | |
lemmer = nltk.stem.WordNetLemmatizer() | |
def LemTokens(tokens): | |
return [lemmer.lemmatize(token) for token in tokens] | |
remove_punct_dict= dict((ord(punct), None) for punct in string.punctuation) | |
def LemNormalize(text): | |
return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict))) | |
def NLTK(input): | |
f = open('corpus.txt', errors='strict') | |
data = f.read() | |
data = data.lower() | |
sent_tokens = nltk.sent_tokenize(data) | |
return bot(sent_tokens) | |
def bot(sent_tokens): | |
robo1_response = '' | |
TfidfVec = TfidfVectorizer(tokenizer = LemNormalize, stop_words='english') | |
tfidf = TfidfVec.fit_transform(sent_tokens) | |
vals = cosine_similarity(tfidf[-1], tfidf) | |
idx = vals.argsort()[0][-2] | |
flat = vals.flatten() | |
flat.sort() | |
req_tfidf = flat[-2] | |
if (req_tfidf == 0): | |
robo1_response= robo1_response+"I could not answer this right now but you can contact the head of our dept (PUSPHA RAJ)." # add the dept recommendation engine and contact details | |
return robo1_response | |
else: | |
robo1_response = robo1_response+sent_tokens[idx] | |
return robo1_response | |
def generator(input=None): | |
response = [] | |
if input: | |
out1 = NLTK(input) | |
if(out1): | |
response.append(out) | |
out2 = pattern(input) | |
if(out2): | |
response.append(out2) | |
out3 = get_best(input) | |
if(out3): | |
response.append(out3) | |
else: | |
out1 = NLTK("Hi, what's the matter") | |
if(out1): | |
for out in out1: | |
response.append(out) | |
out2 = pyjokes.get_joke(language='en', category='all') | |
if(out2): | |
response.append(out2) | |
return response # think of doing this | |
iface = gr.Interface(fn=generator, inputs="text", outputs="text") | |
iface.launch() | |