|
import json |
|
import string |
|
import random |
|
import nltk |
|
import numpy as np |
|
import tensorflow as tf |
|
from nltk.stem import WordNetLemmatizer |
|
from tensorflow.keras import Sequential |
|
from tensorflow.keras.layers import Dense, Dropout |
|
from tensorflow.keras.optimizers.schedules import ExponentialDecay |
|
import streamlit as st |
|
|
|
|
|
nltk.download("punkt") |
|
nltk.download("wordnet") |
|
|
|
|
|
with open('intents.json', 'r') as file: |
|
data = json.load(file) |
|
|
|
|
|
lemmatizer = WordNetLemmatizer() |
|
words = [] |
|
classes = [] |
|
data_X = [] |
|
data_Y = [] |
|
|
|
for intent in data["intents"]: |
|
for pattern in intent["patterns"]: |
|
tokens = nltk.word_tokenize(pattern) |
|
words.extend(tokens) |
|
data_X.append(pattern) |
|
data_Y.append(intent["tag"]) |
|
if intent["tag"] not in classes: |
|
classes.append(intent["tag"]) |
|
|
|
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation] |
|
words = sorted(set(words)) |
|
classes = sorted(set(classes)) |
|
|
|
|
|
training = [] |
|
out_empty = [0] * len(classes) |
|
|
|
for idx, doc in enumerate(data_X): |
|
bow = [] |
|
text = lemmatizer.lemmatize(doc.lower()) |
|
for word in words: |
|
bow.append(1) if word in text else bow.append(0) |
|
output_row = list(out_empty) |
|
output_row[classes.index(data_Y[idx])] = 1 |
|
training.append([bow, output_row]) |
|
|
|
random.shuffle(training) |
|
training = np.array(training, dtype=object) |
|
train_X = np.array(list(training[:, 0]), dtype=np.float32) |
|
train_Y = np.array(list(training[:, 1]), dtype=np.float32) |
|
|
|
|
|
initial_learning_rate = 0.01 |
|
lr_schedule = ExponentialDecay( |
|
initial_learning_rate=initial_learning_rate, |
|
decay_steps=1000, |
|
decay_rate=0.9, |
|
staircase=True |
|
) |
|
|
|
|
|
model = Sequential([ |
|
Dense(128, input_shape=(len(train_X[0]),), activation="relu"), |
|
Dropout(0.5), |
|
Dense(64, activation="relu"), |
|
Dropout(0.5), |
|
Dense(len(train_Y[0]), activation="softmax") |
|
]) |
|
|
|
adam = tf.keras.optimizers.Adam(learning_rate=lr_schedule) |
|
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=["accuracy"]) |
|
model.fit(x=train_X, y=train_Y, epochs=150, verbose=1) |
|
|
|
|
|
def clean_text(text): |
|
tokens = nltk.word_tokenize(text) |
|
tokens = (lemmatizer.lemmatize(word) for word in tokens) |
|
return tokens |
|
|
|
def bag_of_words(text, vocab): |
|
tokens = clean_text(text) |
|
bow = [0] * len(vocab) |
|
for w in tokens: |
|
if w in vocab: |
|
bow[vocab.index(w)] = 1 |
|
return np.array(bow, dtype=np.float32) |
|
|
|
def pred_class(text, vocab, labels): |
|
bow = bag_of_words(text, vocab) |
|
result = model.predict(np.array([bow]))[0] |
|
thresh = 0.5 |
|
y_pred = [[indx, res] for indx, res in enumerate(result) if res > thresh] |
|
y_pred.sort(key=lambda x: x[1], reverse=True) |
|
return_list = [labels[r[0]] for r in y_pred] |
|
return return_list |
|
|
|
def get_response(intents_list, intents_json): |
|
if len(intents_list) == 0: |
|
return "Sorry! I didn't understand." |
|
else: |
|
tag = intents_list[0] |
|
list_of_intents = intents_json["intents"] |
|
for i in list_of_intents: |
|
if i["tag"] == tag: |
|
return random.choice(i["responses"]) |
|
|
|
def chatbot_response(message): |
|
intents = pred_class(message, words, classes) |
|
result = get_response(intents, data) |
|
return result |
|
|
|
|
|
st.title("ChatBot") |
|
user_input = st.text_input("You:") |
|
|
|
if user_input: |
|
response = chatbot_response(user_input) |
|
st.text_area("ChatBot:", value=response, height=100, max_chars=None, key=None) |
|
|