rohan13 commited on
Commit
2376116
·
1 Parent(s): 518a400

- removed streamlit (63c9094e90c4a6e1b2a4dfa7a25d90ed0028856e)
- chatbot widget with flask, socket, html, js, css (a2b71cf094e99b79543d5cfd30c6a45cdb608609)
- git ignore (07de00b304e66f34dfe7a47ad7e6ce8b711b588f)
- added index file (964b90ef783e63834ccd6bfc17087dd74d3177b2)

.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ *.index filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ env/
2
+ .idea
README.md CHANGED
@@ -1,12 +1 @@
1
- ---
2
- title: Makerlab Bot
3
- emoji: 🌍
4
- colorFrom: purple
5
- colorTo: green
6
- sdk: streamlit
7
- sdk_version: 1.17.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # makerlab-bot
 
 
 
 
 
 
 
 
 
 
 
__pycache__/app.cpython-39.pyc ADDED
Binary file (776 Bytes). View file
 
__pycache__/main.cpython-39.pyc ADDED
Binary file (1.14 kB). View file
 
__pycache__/utils.cpython-39.pyc ADDED
Binary file (4.14 kB). View file
 
app.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template
2
+ from flask_socketio import SocketIO, emit
3
+ from main import run
4
+
5
+ app = Flask(__name__)
6
+ app.config['SECRET_KEY'] = 'secret!'
7
+ socketio = SocketIO(app)
8
+
9
+
10
+ @app.route('/')
11
+ def index():
12
+ return render_template('index.html')
13
+
14
+
15
+ @socketio.on('message')
16
+ def handle_message(data):
17
+ question = data['question']
18
+ print("question: " + question)
19
+ response = run(question)
20
+ emit('response', {'response': response})
21
+
22
+
23
+ if __name__ == '__main__':
24
+ socketio.run(app)
main.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
2
+ from langchain.embeddings.openai import OpenAIEmbeddings
3
+ from langchain.llms import OpenAI
4
+
5
+ from utils import generate_answer
6
+ from utils import get_search_index
7
+
8
+ open_ai_pkl = "open_ai.pkl"
9
+ open_ai_index = "open_ai.index"
10
+
11
+ gpt_3_5 = OpenAI(model_name='gpt-3.5-turbo',temperature=0)
12
+
13
+ open_ai_embeddings = OpenAIEmbeddings()
14
+
15
+ def run(question):
16
+
17
+ gpt_3_5_index = get_search_index(open_ai_pkl, open_ai_index, open_ai_embeddings)
18
+
19
+ gpt_3_5_chain = load_qa_with_sources_chain(gpt_3_5, chain_type="stuff", verbose=True)
20
+
21
+ answer = generate_answer(gpt_3_5_chain, gpt_3_5_index, question)
22
+ return answer
open_ai.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d25013b3bf7b7195d01ec1cc9ac1527638d8db68d94556b3dcc69b7dd8ff704
3
+ size 3016749
open_ai.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c903018b7b3ad6f89b802f0e36fc92c88fb793c4f6e2499687b8823050a4df0
3
+ size 3373815
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ faiss-cpu
2
+ langchain
3
+ beautifulsoup4
4
+ PyPDF2
5
+ openai
6
+ flask
7
+ flask-socketio
static/chatbot.js ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $(document).ready(function() {
2
+ // Initialize variables
3
+ var $messages = $('.chat-messages');
4
+ var $input = $('.chat-input input');
5
+ var $submit = $('.chat_submit');
6
+ var session_id = '';
7
+ $messages.children().each(function() {
8
+ $(this).addClass('chat-message');
9
+ });
10
+
11
+ // Initialize SocketIO connection
12
+ var socket = io.connect('http://' + document.domain + ':' + location.port);
13
+
14
+ // Function to send message to Flask-SocketIO app
15
+ function sendMessage(message) {
16
+ console.log("message: " + message )
17
+ socket.emit('message', {'question': message});
18
+ }
19
+
20
+ // Function to display message
21
+ function displayMessage(message, isUser) {
22
+ var $message = $('<div>').addClass('chat-message round');
23
+ var $messageText = $('<p>').html(message.replace(/(https?:\/\/[^\s]+)/g, '<a href="$1">$1</a>'));
24
+
25
+ $message.append($messageText);
26
+ if (isUser) {
27
+ $message.addClass('user');
28
+ } else {
29
+ $message.addClass('bot')
30
+ }
31
+ if ($messages) {
32
+ $messages.append($message);
33
+ if ($messages[0]) {
34
+ $messages.animate({scrollTop: $messages[0].scrollHeight}, 300);
35
+ }
36
+ } else {
37
+ $('.chat-container').append($message);
38
+ $('.chat-container').animate({scrollTop: 0}, 300);
39
+ }
40
+ }
41
+
42
+
43
+ socket.on('response', function(data) {
44
+ console.log("Received response: " + data.response)
45
+ var response = data.response;
46
+ displayMessage(response, false);
47
+ });
48
+
49
+
50
+ // Send message on submit
51
+ $submit.click(function(event) {
52
+ event.preventDefault();
53
+ var message = $input.val().trim();
54
+ console.log("Submit clicked: " + message)
55
+ if (message !== '') {
56
+ displayMessage(message, true);
57
+ sendMessage(message);
58
+ $input.val('');
59
+ }
60
+ });
61
+
62
+ // Send message on enter key press
63
+ $input.keydown(function(event) {
64
+ if (event.keyCode === 13) {
65
+ event.preventDefault();
66
+ $submit.click();
67
+ }
68
+ });
69
+
70
+ // Initial message
71
+ displayMessage('Ask me anything');
72
+ });
static/style.css ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .chat-container {
2
+ position: fixed;
3
+ bottom: 30px;
4
+ right: 30px;
5
+ z-index: 999;
6
+ background-color: #fff;
7
+ border-radius: 10px;
8
+ box-shadow: 0px 0px 20px rgba(0, 0, 0, 0.2);
9
+ max-width: 400px;
10
+ min-width: 300px;
11
+ }
12
+
13
+ .round {
14
+ border-radius: 10px;
15
+ -webkit-border-radius: 10px;
16
+ -moz-border-radius: 30px;
17
+
18
+ }
19
+
20
+ .chat-header {
21
+ display: flex;
22
+ align-items: center;
23
+ justify-content: space-between;
24
+ padding: 10px;
25
+ background-color: rgb(113, 239, 234);
26
+ color: #fff;
27
+ border-top-left-radius: 10px;
28
+ border-top-right-radius: 10px;
29
+ }
30
+
31
+ .chat-header h4 {
32
+ margin: 0;
33
+ }
34
+
35
+ .chat-close {
36
+ cursor: pointer;
37
+ }
38
+
39
+ .chat-body {
40
+ height: 300px;
41
+ overflow-y: scroll;
42
+ padding: 10px;
43
+ word-wrap: break-word;
44
+ display:flex;
45
+ flex-direction: column;
46
+ }
47
+
48
+ .chat-message {
49
+ margin: 10px;
50
+ }
51
+
52
+ .chat-message p {
53
+ margin: 0;
54
+ padding: 10px;
55
+ font-size: 16px;
56
+ line-height: 1.4;
57
+ position: relative;
58
+ word-wrap: break-word;
59
+ border-radius: 10px;
60
+ }
61
+
62
+ .chat-message.user {
63
+ display: flex;
64
+ align-self: flex-end;
65
+ justify-content: flex-end;
66
+ text-align: right;
67
+ align-items: center;
68
+ background-color: rgba(113, 239, 234, 0.75);
69
+ border-top-right-radius: 0px;
70
+ border-bottom-right-radius: 0px;
71
+ border-bottom-left-radius: 10px;
72
+ word-wrap: break-word;
73
+ color: #000;
74
+ }
75
+
76
+
77
+ .chat-message.bot {
78
+ display: flex;
79
+ align-self: flex-start;
80
+ justify-content: flex-start;
81
+ text-align: left;
82
+ align-items: center;
83
+ background-color: rgba(113, 239, 234, 0.75);
84
+ border-top-left-radius: 0px;
85
+ border-bottom-right-radius: 10px;
86
+ border-bottom-left-radius: 0px;
87
+ word-wrap: break-word;
88
+ }
89
+
90
+ .chat-message.bot p {
91
+ margin: 0;
92
+ padding: 10px;
93
+ font-size: 16px;
94
+ line-height: 1.4;
95
+ position: relative;
96
+ word-wrap: break-word;
97
+ border-radius: 10px;
98
+ overflow-wrap: anywhere;
99
+ }
100
+
101
+ .chat-message.user:after {
102
+ content: "";
103
+ position: relative;
104
+ top: 0;
105
+ right: -15px;
106
+ width: 0;
107
+ height: 0;
108
+ border-top: 15px solid transparent;
109
+ border-bottom: 15px solid transparent;
110
+ border-left: 15px solid #71EFEABF;
111
+ border-top-right-radius: 10px;
112
+ }
113
+
114
+ .chat-message.bot:before {
115
+ content: "";
116
+ position: relative;
117
+ top: 0;
118
+ left: -15px;
119
+ width: 0;
120
+ height: 0;
121
+ border-top: 15px solid transparent;
122
+ border-bottom: 15px solid transparent;
123
+ border-right: 15px solid #71EFEABF;
124
+ border-top-left-radius: 10px;
125
+ }
126
+
127
+
128
+ .chat-input {
129
+ display: flex;
130
+ margin-top: 10px;
131
+ }
132
+
133
+ .chat-input input {
134
+ flex-grow: 1;
135
+ border: none;
136
+ border-radius: 5px;
137
+ padding: 8px 10px;
138
+ font-size: 16px;
139
+ margin-right: 10px;
140
+ box-shadow: 0px 0px 5px rgba(0, 0, 0, 0.1);
141
+ }
142
+
143
+ .chat-input button {
144
+ background-color: #FFA500;
145
+ color: #fff;
146
+ border: none;
147
+ border-radius: 5px;
148
+ padding: 8px 10px;
149
+ font-size: 16px;
150
+ cursor: pointer;
151
+ box-shadow: 0px 0px 5px rgba(0, 0, 0, 0.1);
152
+ }
templates/index.html ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <title>MakerlabX3DPrinting QA</title>
6
+ <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css">
7
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
8
+ <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
9
+ </head>
10
+ <body>
11
+ <div class="chat-container">
12
+ <div class="chat-header">
13
+ <h4>Makerlab Q&A Bot</h4>
14
+ <i class="fa fa-close chat-close"></i>
15
+ </div>
16
+ <div class="chat-body chat-messages round"></div>
17
+ <div class="chat-input">
18
+ <input type="text" placeholder="Type your message">
19
+ <button class="chat_submit">Send</button>
20
+ </div>
21
+ </div>
22
+ <!--<script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.5.1/socket.io.js" integrity="sha512-sY2t8W1xNQ2yB+1RFXJv+wwhdN7CHX9Z+fhM7JH/3B3q1x7VJBOwKe+zb7VW0EC8XG5M5rjBQd7+47F5fQlhKQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>-->
23
+ <script src="https://cdn.socket.io/4.4.1/socket.io.min.js"></script>
24
+ <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
25
+ <script src="{{ url_for('static', filename='chatbot.js') }}"></script>
26
+ </body>
27
+ </html>
utils.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+ import time
4
+ from urllib.parse import urlparse, urljoin
5
+
6
+ import faiss
7
+ import requests
8
+ from PyPDF2 import PdfReader
9
+ from bs4 import BeautifulSoup
10
+ from langchain.docstore.document import Document
11
+ from langchain.text_splitter import CharacterTextSplitter
12
+ from langchain.vectorstores.faiss import FAISS
13
+
14
+ book_url = 'https://g.co/kgs/2VFC7u'
15
+ book_file = "Book.pdf"
16
+ url = 'https://makerlab.illinois.edu/'
17
+ def get_search_index(pickle_file, index_file, embeddings):
18
+
19
+ if os.path.isfile(pickle_file) and os.path.isfile(index_file) and os.path.getsize(pickle_file) > 0:
20
+ # Load index from pickle file
21
+ with open(pickle_file, "rb") as f:
22
+ search_index = pickle.load(f)
23
+ else:
24
+ source_chunks = create_chunk_documents()
25
+
26
+ search_index = search_index_from_docs(source_chunks, embeddings=embeddings)
27
+
28
+ faiss.write_index(search_index.index, index_file)
29
+
30
+ # Save index to pickle file
31
+ with open(pickle_file, "wb") as f:
32
+ pickle.dump(search_index, f)
33
+
34
+ return search_index
35
+
36
+
37
+ def create_chunk_documents():
38
+ sources = fetch_data_for_embeddings(url, book_file, book_url)
39
+ # print("sources" + str(len(sources)))
40
+
41
+ splitter = CharacterTextSplitter(separator=" ", chunk_size=800, chunk_overlap=0)
42
+
43
+ source_chunks = splitter.split_documents(sources)
44
+
45
+ for chunk in source_chunks:
46
+ print("Size of chunk: " + str(len(chunk.page_content) + len(chunk.metadata)))
47
+ if chunk.page_content is None or chunk.page_content == '':
48
+ print("removing chunk: "+ chunk.page_content)
49
+ source_chunks.remove(chunk)
50
+ elif len(chunk.page_content) >=1000:
51
+ print("splitting document")
52
+ source_chunks.extend(splitter.split_documents([chunk]))
53
+ # print("Chunks: " + str(len(source_chunks)) + "and type " + str(type(source_chunks)))
54
+ return source_chunks
55
+
56
+
57
+ def fetch_data_for_embeddings(url, book_file, book_url):
58
+ sources = get_website_data(url)
59
+ sources.extend(get_document_data(book_file, book_url))
60
+ return sources
61
+
62
+ def get_website_data(index_url):
63
+ # Get all page paths from index
64
+ paths = get_paths(index_url)
65
+
66
+ # Filter out invalid links and join them with the base URL
67
+ links = get_links(index_url, paths)
68
+
69
+ return get_content_from_links(links, index_url)
70
+
71
+
72
+ def get_content_from_links(links, index_url):
73
+ content_list = []
74
+ for link in set(links):
75
+ if link.startswith(index_url):
76
+ page_data = requests.get(link).content
77
+ soup = BeautifulSoup(page_data, "html.parser")
78
+
79
+ # Get page content
80
+ content = soup.get_text(separator="\n")
81
+ # print(link)
82
+
83
+ # Get page metadata
84
+ metadata = {"source": link}
85
+
86
+ content_list.append(Document(page_content=content, metadata=metadata))
87
+ time.sleep(1)
88
+ # print("content list" + str(len(content_list)))
89
+ return content_list
90
+
91
+
92
+ def get_paths(index_url):
93
+ index_data = requests.get(index_url).content
94
+ soup = BeautifulSoup(index_data, "html.parser")
95
+ paths = set([a.get('href') for a in soup.find_all('a', href=True)])
96
+ return paths
97
+
98
+
99
+ def get_links(index_url, paths):
100
+ links = []
101
+ for path in paths:
102
+ url = urljoin(index_url, path)
103
+ parsed_url = urlparse(url)
104
+ if parsed_url.scheme in ["http", "https"] and "squarespace" not in parsed_url.netloc:
105
+ links.append(url)
106
+ return links
107
+
108
+
109
+ def get_document_data(book_file, book_url):
110
+ document_list = []
111
+ with open(book_file, 'rb') as f:
112
+ pdf_reader = PdfReader(f)
113
+ for i in range(len(pdf_reader.pages)):
114
+ page_text = pdf_reader.pages[i].extract_text()
115
+ metadata = {"source": book_url}
116
+ document_list.append(Document(page_content=page_text, metadata=metadata))
117
+
118
+ # print("document list" + str(len(document_list)))
119
+ return document_list
120
+
121
+ def search_index_from_docs(source_chunks, embeddings):
122
+ # Create index from chunk documents
123
+ # print("Size of chunk" + str(len(source_chunks)))
124
+ search_index = FAISS.from_texts([doc.page_content for doc in source_chunks], embeddings, metadatas=[doc.metadata for doc in source_chunks])
125
+ return search_index
126
+ def generate_answer(chain, index, question):
127
+ #Get answer
128
+ answer = chain(
129
+ {
130
+ "input_documents": index.similarity_search(question, k=4),
131
+ "question": question,
132
+ },
133
+ return_only_outputs=True,
134
+ )["output_text"]
135
+
136
+ return answer