Create streamlit_app.py
Browse files- streamlit_app.py +125 -0
streamlit_app.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import string
|
3 |
+
import random
|
4 |
+
import nltk
|
5 |
+
import numpy as np
|
6 |
+
import tensorflow as tf
|
7 |
+
from nltk.stem import WordNetLemmatizer
|
8 |
+
from tensorflow.keras import Sequential
|
9 |
+
from tensorflow.keras.layers import Dense, Dropout
|
10 |
+
from tensorflow.keras.optimizers.schedules import ExponentialDecay
|
11 |
+
import streamlit as st
|
12 |
+
|
13 |
+
# Download NLTK resources
|
14 |
+
nltk.download("punkt")
|
15 |
+
nltk.download("wordnet")
|
16 |
+
|
17 |
+
# Load the intents JSON file
|
18 |
+
with open('intents.json', 'r') as file:
|
19 |
+
data = json.load(file)
|
20 |
+
|
21 |
+
# Preprocess and prepare data
|
22 |
+
lemmatizer = WordNetLemmatizer()
|
23 |
+
words = []
|
24 |
+
classes = []
|
25 |
+
data_X = []
|
26 |
+
data_Y = []
|
27 |
+
|
28 |
+
for intent in data["intents"]:
|
29 |
+
for pattern in intent["patterns"]:
|
30 |
+
tokens = nltk.word_tokenize(pattern)
|
31 |
+
words.extend(tokens)
|
32 |
+
data_X.append(pattern)
|
33 |
+
data_Y.append(intent["tag"])
|
34 |
+
if intent["tag"] not in classes:
|
35 |
+
classes.append(intent["tag"])
|
36 |
+
|
37 |
+
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation]
|
38 |
+
words = sorted(set(words))
|
39 |
+
classes = sorted(set(classes))
|
40 |
+
|
41 |
+
# Bag of Words model
|
42 |
+
training = []
|
43 |
+
out_empty = [0] * len(classes)
|
44 |
+
|
45 |
+
for idx, doc in enumerate(data_X):
|
46 |
+
bow = []
|
47 |
+
text = lemmatizer.lemmatize(doc.lower())
|
48 |
+
for word in words:
|
49 |
+
bow.append(1) if word in text else bow.append(0)
|
50 |
+
output_row = list(out_empty)
|
51 |
+
output_row[classes.index(data_Y[idx])] = 1
|
52 |
+
training.append([bow, output_row])
|
53 |
+
|
54 |
+
random.shuffle(training)
|
55 |
+
training = np.array(training, dtype=object)
|
56 |
+
train_X = np.array(list(training[:, 0]), dtype=np.float32)
|
57 |
+
train_Y = np.array(list(training[:, 1]), dtype=np.float32)
|
58 |
+
|
59 |
+
# Define learning rate schedule
|
60 |
+
initial_learning_rate = 0.01
|
61 |
+
lr_schedule = ExponentialDecay(
|
62 |
+
initial_learning_rate=initial_learning_rate,
|
63 |
+
decay_steps=1000,
|
64 |
+
decay_rate=0.9,
|
65 |
+
staircase=True
|
66 |
+
)
|
67 |
+
|
68 |
+
# Build the neural network model
|
69 |
+
model = Sequential([
|
70 |
+
Dense(128, input_shape=(len(train_X[0]),), activation="relu"),
|
71 |
+
Dropout(0.5),
|
72 |
+
Dense(64, activation="relu"),
|
73 |
+
Dropout(0.5),
|
74 |
+
Dense(len(train_Y[0]), activation="softmax")
|
75 |
+
])
|
76 |
+
|
77 |
+
adam = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
|
78 |
+
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=["accuracy"])
|
79 |
+
model.fit(x=train_X, y=train_Y, epochs=150, verbose=1)
|
80 |
+
|
81 |
+
# Define the helper functions
|
82 |
+
def clean_text(text):
|
83 |
+
tokens = nltk.word_tokenize(text)
|
84 |
+
tokens = (lemmatizer.lemmatize(word) for word in tokens)
|
85 |
+
return tokens
|
86 |
+
|
87 |
+
def bag_of_words(text, vocab):
|
88 |
+
tokens = clean_text(text)
|
89 |
+
bow = [0] * len(vocab)
|
90 |
+
for w in tokens:
|
91 |
+
if w in vocab:
|
92 |
+
bow[vocab.index(w)] = 1
|
93 |
+
return np.array(bow, dtype=np.float32)
|
94 |
+
|
95 |
+
def pred_class(text, vocab, labels):
|
96 |
+
bow = bag_of_words(text, vocab)
|
97 |
+
result = model.predict(np.array([bow]))[0]
|
98 |
+
thresh = 0.5
|
99 |
+
y_pred = [[indx, res] for indx, res in enumerate(result) if res > thresh]
|
100 |
+
y_pred.sort(key=lambda x: x[1], reverse=True)
|
101 |
+
return_list = [labels[r[0]] for r in y_pred]
|
102 |
+
return return_list
|
103 |
+
|
104 |
+
def get_response(intents_list, intents_json):
|
105 |
+
if len(intents_list) == 0:
|
106 |
+
return "Sorry! I didn't understand."
|
107 |
+
else:
|
108 |
+
tag = intents_list[0]
|
109 |
+
list_of_intents = intents_json["intents"]
|
110 |
+
for i in list_of_intents:
|
111 |
+
if i["tag"] == tag:
|
112 |
+
return random.choice(i["responses"])
|
113 |
+
|
114 |
+
def chatbot_response(message):
|
115 |
+
intents = pred_class(message, words, classes)
|
116 |
+
result = get_response(intents, data)
|
117 |
+
return result
|
118 |
+
|
119 |
+
# Streamlit app interface
|
120 |
+
st.title("ChatBot")
|
121 |
+
user_input = st.text_input("You:")
|
122 |
+
|
123 |
+
if user_input:
|
124 |
+
response = chatbot_response(user_input)
|
125 |
+
st.text_area("ChatBot:", value=response, height=100, max_chars=None, key=None)
|