import json import string import random import nltk import numpy as np import tensorflow as tf from nltk.stem import WordNetLemmatizer from tensorflow.keras import Sequential from tensorflow.keras.layers import Dense, Dropout from tensorflow.keras.optimizers.schedules import ExponentialDecay import streamlit as st # Download NLTK resources nltk.download("punkt") nltk.download("wordnet") # Load the intents JSON file with open('intents.json', 'r') as file: data = json.load(file) # Preprocess and prepare data lemmatizer = WordNetLemmatizer() words = [] classes = [] data_X = [] data_Y = [] for intent in data["intents"]: for pattern in intent["patterns"]: tokens = nltk.word_tokenize(pattern) words.extend(tokens) data_X.append(pattern) data_Y.append(intent["tag"]) if intent["tag"] not in classes: classes.append(intent["tag"]) words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation] words = sorted(set(words)) classes = sorted(set(classes)) # Bag of Words model training = [] out_empty = [0] * len(classes) for idx, doc in enumerate(data_X): bow = [] text = lemmatizer.lemmatize(doc.lower()) for word in words: bow.append(1) if word in text else bow.append(0) output_row = list(out_empty) output_row[classes.index(data_Y[idx])] = 1 training.append([bow, output_row]) random.shuffle(training) training = np.array(training, dtype=object) train_X = np.array(list(training[:, 0]), dtype=np.float32) train_Y = np.array(list(training[:, 1]), dtype=np.float32) # Define learning rate schedule initial_learning_rate = 0.01 lr_schedule = ExponentialDecay( initial_learning_rate=initial_learning_rate, decay_steps=1000, decay_rate=0.9, staircase=True ) # Build the neural network model model = Sequential([ Dense(128, input_shape=(len(train_X[0]),), activation="relu"), Dropout(0.5), Dense(64, activation="relu"), Dropout(0.5), Dense(len(train_Y[0]), activation="softmax") ]) adam = tf.keras.optimizers.Adam(learning_rate=lr_schedule) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=["accuracy"]) model.fit(x=train_X, y=train_Y, epochs=150, verbose=1) # Define the helper functions def clean_text(text): tokens = nltk.word_tokenize(text) tokens = (lemmatizer.lemmatize(word) for word in tokens) return tokens def bag_of_words(text, vocab): tokens = clean_text(text) bow = [0] * len(vocab) for w in tokens: if w in vocab: bow[vocab.index(w)] = 1 return np.array(bow, dtype=np.float32) def pred_class(text, vocab, labels): bow = bag_of_words(text, vocab) result = model.predict(np.array([bow]))[0] thresh = 0.5 y_pred = [[indx, res] for indx, res in enumerate(result) if res > thresh] y_pred.sort(key=lambda x: x[1], reverse=True) return_list = [labels[r[0]] for r in y_pred] return return_list def get_response(intents_list, intents_json): if len(intents_list) == 0: return "Sorry! I didn't understand." else: tag = intents_list[0] list_of_intents = intents_json["intents"] for i in list_of_intents: if i["tag"] == tag: return random.choice(i["responses"]) def chatbot_response(message): intents = pred_class(message, words, classes) result = get_response(intents, data) return result # Streamlit app interface st.title("ChatBot") user_input = st.text_input("You:") if user_input: response = chatbot_response(user_input) st.text_area("ChatBot:", value=response, height=100, max_chars=None, key=None)