Spaces:
Runtime error
Runtime error
Commit
·
4a88e70
0
Parent(s):
Duplicate from rohan13/makerlab-bot
Browse files- .gitattributes +34 -0
- .gitignore +2 -0
- README.md +13 -0
- __pycache__/app.cpython-39.pyc +0 -0
- __pycache__/main.cpython-39.pyc +0 -0
- __pycache__/utils.cpython-39.pyc +0 -0
- app.py +42 -0
- main.py +27 -0
- open_ai.index +3 -0
- open_ai.pkl +3 -0
- requirements.txt +11 -0
- static/chatbot.js +160 -0
- static/style.css +296 -0
- templates/index.html +35 -0
- utils.py +335 -0
.gitattributes
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
env/
|
2 |
+
.idea
|
README.md
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Makerlab Bot
|
3 |
+
emoji: 📉
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: blue
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.23.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
duplicated_from: rohan13/makerlab-bot
|
11 |
+
---
|
12 |
+
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
__pycache__/app.cpython-39.pyc
ADDED
Binary file (921 Bytes). View file
|
|
__pycache__/main.cpython-39.pyc
ADDED
Binary file (822 Bytes). View file
|
|
__pycache__/utils.cpython-39.pyc
ADDED
Binary file (4.14 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, render_template
|
2 |
+
from flask_executor import Executor
|
3 |
+
from flask_socketio import SocketIO, emit
|
4 |
+
from flask_cors import cross_origin, CORS
|
5 |
+
from main import run
|
6 |
+
from gevent import monkey
|
7 |
+
|
8 |
+
monkey.patch_all(ssl=False)
|
9 |
+
app = Flask(__name__)
|
10 |
+
app.config['SECRET_KEY'] = 'secret!'
|
11 |
+
socketio = SocketIO(app, cors_allowed_origins="*", async_mode='gevent', logger=True)
|
12 |
+
cors = CORS(app)
|
13 |
+
|
14 |
+
executor = Executor(app)
|
15 |
+
|
16 |
+
executor.init_app(app)
|
17 |
+
app.config['EXECUTOR_MAX_WORKERS'] = 10
|
18 |
+
|
19 |
+
@app.route('/')
|
20 |
+
def index():
|
21 |
+
return render_template('index.html')
|
22 |
+
|
23 |
+
|
24 |
+
@socketio.on('message')
|
25 |
+
def handle_message(data):
|
26 |
+
question = data['question']
|
27 |
+
print("question: " + question)
|
28 |
+
|
29 |
+
if executor.futures:
|
30 |
+
emit('response', {'response': 'Server is busy, please try again later'})
|
31 |
+
return
|
32 |
+
|
33 |
+
try:
|
34 |
+
future = executor.submit(run, question)
|
35 |
+
response = future.result()
|
36 |
+
emit('response', {'response': response})
|
37 |
+
except Exception as e:
|
38 |
+
traceback.print_exc()
|
39 |
+
emit('response', {'response': 'Server is busy. Please try again later.'})
|
40 |
+
|
41 |
+
if __name__ == '__main__':
|
42 |
+
socketio.run(app, host="0.0.0.0", port=7860)
|
main.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utils import create_index, get_agent_chain, get_prompt_and_tools, get_search_index
|
2 |
+
from utils import get_custom_agent, get_prompt_and_tools_for_custom_agent
|
3 |
+
question_starters = ['who', 'why', 'what', 'how', 'where', 'when', 'which', 'whom', 'whose']
|
4 |
+
|
5 |
+
def run(question):
|
6 |
+
|
7 |
+
index = get_search_index()
|
8 |
+
|
9 |
+
# prompt, tools = get_prompt_and_tools()
|
10 |
+
|
11 |
+
# agent_chain = get_agent_chain(prompt, tools)
|
12 |
+
|
13 |
+
prompt, tools = get_prompt_and_tools_for_custom_agent()
|
14 |
+
|
15 |
+
agent_chain = get_custom_agent(prompt, tools)
|
16 |
+
|
17 |
+
result = None
|
18 |
+
|
19 |
+
try:
|
20 |
+
result = agent_chain.run(question)
|
21 |
+
print(result)
|
22 |
+
except ValueError as ve:
|
23 |
+
if "Could not parse LLM output:" in ve.args[0] and question.lower().startswith(tuple(question_starters)) and not question.lower().endswith('?'):
|
24 |
+
question = question + '?'
|
25 |
+
result = agent_chain.run(question)
|
26 |
+
|
27 |
+
return result
|
open_ai.index
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5a87eb7dbbfd3245fc8025fb2467723bdcba8cdf308127050de9f8bbdeb21bc
|
3 |
+
size 2838573
|
open_ai.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c399ec43dab5c11fabcc598d507b88db77a59b019c1b2533f2792199c6a1fcc
|
3 |
+
size 3171039
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
faiss-cpu==1.7.3
|
2 |
+
langchain==0.0.131
|
3 |
+
beautifulsoup4==4.12.0
|
4 |
+
PyPDF2==3.0.1
|
5 |
+
openai==0.27.4
|
6 |
+
flask==2.2.3
|
7 |
+
flask-socketio==5.3.3
|
8 |
+
flask-cors==3.0.10
|
9 |
+
flask-executor==1.0.0
|
10 |
+
gevent==22.10.2
|
11 |
+
gevent-websocket==0.10.1
|
static/chatbot.js
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
$(document).ready(function() {
|
2 |
+
// Initialize variables
|
3 |
+
var $chatContainer = $('.chat-container');
|
4 |
+
var $chatHeader = $('.chat-header');
|
5 |
+
var $chatBody = $('.chat-body');
|
6 |
+
var $chatInput = $('.chat-input');
|
7 |
+
var $input = $('.chat-input input');
|
8 |
+
var $submit = $('.chat_submit');
|
9 |
+
var session_id = '';
|
10 |
+
$chatBody.children().each(function() {
|
11 |
+
$(this).addClass('chat-message');
|
12 |
+
});
|
13 |
+
const buttonLabels = ["What is Makerlab?", "What is 3D printing?",
|
14 |
+
"Who are the founders of Makerlab?", "What are the 3D printing prices at Makerlab?",
|
15 |
+
"How can I host a birthday at Makerlab?", "Can I book an appointment at Makerlab?",
|
16 |
+
"Tell me about softwares used to create 3D printing designs", "Hi, I am bob. Tell me when Makerlab was founded.",
|
17 |
+
"Can I get my custom designs 3D printed at Makerlab?", "Can I host a private event at Makerlab?",
|
18 |
+
"Does Makerlab host any workshop?", "When is Makerlab open?", "How can I contact the Makerlab Team?"];
|
19 |
+
|
20 |
+
|
21 |
+
// Initialize SocketIO connection
|
22 |
+
var socket = io.connect('https://' + document.domain + ':' + location.port);
|
23 |
+
const container = document.getElementById("button-container");
|
24 |
+
|
25 |
+
|
26 |
+
for (let i = 0; i < buttonLabels.length; i++) {
|
27 |
+
|
28 |
+
const button = document.createElement("button");
|
29 |
+
button.innerHTML = buttonLabels[i];
|
30 |
+
button.setAttribute("class", "queries");
|
31 |
+
button.setAttribute("id", `button-${i}`);
|
32 |
+
button.style.margin = "5px";
|
33 |
+
container.appendChild(button);
|
34 |
+
}
|
35 |
+
scrollButtons();
|
36 |
+
|
37 |
+
|
38 |
+
// Function to send message to Flask-SocketIO app
|
39 |
+
function sendMessage(message) {
|
40 |
+
console.log("message: " + message )
|
41 |
+
socket.emit('message', {'question': message});
|
42 |
+
}
|
43 |
+
|
44 |
+
// Function to display message
|
45 |
+
function displayMessage(message, isUser, hasHtml) {
|
46 |
+
var $message = $('<div>').addClass('chat-message round');
|
47 |
+
if (hasHtml) {
|
48 |
+
$messageText = $('<p>').html(message);
|
49 |
+
} else {
|
50 |
+
$messageText = $('<p>').html(message.replace(/(https?:\/\/[^\s,]+)/g, '<a href="$1" target="_blank">$1</a>').replace(/(SOURCES:)/, '<br>$1'));
|
51 |
+
}
|
52 |
+
// var $messageText = $('<p>').html(message.replace(/(https?:\/\/[^\s,]+)/g, '<a href="$1">$1</a>'));
|
53 |
+
|
54 |
+
$message.append($messageText);
|
55 |
+
if (isUser) {
|
56 |
+
$message.addClass('user');
|
57 |
+
} else {
|
58 |
+
$message.addClass('bot')
|
59 |
+
}
|
60 |
+
if ($chatBody) {
|
61 |
+
$chatBody.append($message);
|
62 |
+
if ($chatBody[0]) {
|
63 |
+
$chatBody.animate({scrollTop: $chatBody[0].scrollHeight}, 300);
|
64 |
+
}
|
65 |
+
} else {
|
66 |
+
$('.chat-container').append($message);
|
67 |
+
$('.chat-container').animate({scrollTop: 0}, 300);
|
68 |
+
}
|
69 |
+
}
|
70 |
+
|
71 |
+
|
72 |
+
socket.on('response', function(data) {
|
73 |
+
console.log("Received response: " + data.response)
|
74 |
+
var response = data.response;
|
75 |
+
displayMessage(response, false);
|
76 |
+
});
|
77 |
+
|
78 |
+
|
79 |
+
// Send message on submit
|
80 |
+
$submit.click(function(event) {
|
81 |
+
event.preventDefault();
|
82 |
+
var message = $input.val().trim();
|
83 |
+
console.log("Submit clicked: " + message)
|
84 |
+
if (message !== '') {
|
85 |
+
displayMessage(message, true);
|
86 |
+
sendMessage(message);
|
87 |
+
$input.val('');
|
88 |
+
}
|
89 |
+
});
|
90 |
+
|
91 |
+
// Send message on enter key press
|
92 |
+
$input.keydown(function(event) {
|
93 |
+
if (event.keyCode === 13) {
|
94 |
+
event.preventDefault();
|
95 |
+
$submit.click();
|
96 |
+
}
|
97 |
+
});
|
98 |
+
|
99 |
+
// Initial message
|
100 |
+
displayMessage('Learn about <a href="https://makerlab.illinois.edu/" target="_blank">Makerlab</a>', false, true);
|
101 |
+
displayMessage('This bot is powered by Gpt 3.5 and thus may get things wrong. You can click on any of the questions on the left and they will get copied to this window. You can also type in a question here. If you find the bot useful or have feedback on improving it, drop us a line at <a href="https://makerlab.illinois.edu/contact" target="_blank">Makerlab - Contact</a>', false, true);
|
102 |
+
|
103 |
+
// Function to minimize the widget
|
104 |
+
function minimizeWidget() {
|
105 |
+
$chatContainer.addClass('minimized');
|
106 |
+
$chatHeader.hide();
|
107 |
+
$chatBody.hide()
|
108 |
+
$chatInput.hide();
|
109 |
+
$chatContainer.append('<div class="chat-bot-icon"><i class="fa fa-android"></i></div>');
|
110 |
+
}
|
111 |
+
|
112 |
+
// Function to maximize the widget
|
113 |
+
function maximizeWidget() {
|
114 |
+
$chatContainer.removeClass('minimized');
|
115 |
+
$chatBody.show()
|
116 |
+
$chatHeader.show();
|
117 |
+
$chatInput.show();
|
118 |
+
$('.chat-bot-icon').remove();
|
119 |
+
}
|
120 |
+
|
121 |
+
// Minimize the widget on click of close button
|
122 |
+
$chatHeader.find('.chat-close').click(function() {
|
123 |
+
minimizeWidget();
|
124 |
+
});
|
125 |
+
|
126 |
+
// Maximize the widget on click of chat-bot-icon
|
127 |
+
$chatContainer.on('click', '.chat-bot-icon', function() {
|
128 |
+
maximizeWidget();
|
129 |
+
});
|
130 |
+
|
131 |
+
// Add event listener to each button
|
132 |
+
$('.queries').click(function() {
|
133 |
+
// Set the value of the input field to the text content of the clicked button
|
134 |
+
$('input[type="text"]').val($(this).text());
|
135 |
+
});
|
136 |
+
|
137 |
+
function scrollButtons() {
|
138 |
+
var container = document.getElementById("button-container");
|
139 |
+
var buttons = container.querySelectorAll(".queries");
|
140 |
+
var current = 0;
|
141 |
+
|
142 |
+
var scrollInterval = setInterval(function() {
|
143 |
+
buttons[current].scrollIntoView({ behavior: "smooth", block: "nearest", inline: "center" });
|
144 |
+
current = (current + 1) % buttons.length;
|
145 |
+
}, 1000);
|
146 |
+
|
147 |
+
container.addEventListener("mouseenter", function() {
|
148 |
+
clearInterval(scrollInterval);
|
149 |
+
});
|
150 |
+
|
151 |
+
container.addEventListener("mouseleave", function() {
|
152 |
+
scrollInterval = setInterval(function() {
|
153 |
+
buttons[current].scrollIntoView({ behavior: "smooth", block: "nearest", inline: "center" });
|
154 |
+
current = (current + 1) % buttons.length;
|
155 |
+
}, 1000);
|
156 |
+
});
|
157 |
+
}
|
158 |
+
|
159 |
+
|
160 |
+
});
|
static/style.css
ADDED
@@ -0,0 +1,296 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.chat-container {
|
2 |
+
position: fixed;
|
3 |
+
bottom: 30px;
|
4 |
+
right: 30px;
|
5 |
+
z-index: 999;
|
6 |
+
background-color: #fff;
|
7 |
+
border-radius: 10px;
|
8 |
+
box-shadow: 0px 0px 20px rgba(0, 0, 0, 0.2);
|
9 |
+
max-width: 400px;
|
10 |
+
min-width: 400px;
|
11 |
+
}
|
12 |
+
|
13 |
+
.round {
|
14 |
+
border-radius: 10px;
|
15 |
+
-webkit-border-radius: 10px;
|
16 |
+
-moz-border-radius: 30px;
|
17 |
+
|
18 |
+
}
|
19 |
+
|
20 |
+
.chat-header {
|
21 |
+
display: flex;
|
22 |
+
align-items: center;
|
23 |
+
justify-content: space-between;
|
24 |
+
padding: 10px;
|
25 |
+
background-color: darkblue;
|
26 |
+
color: #fff;
|
27 |
+
border-top-left-radius: 10px;
|
28 |
+
border-top-right-radius: 10px;
|
29 |
+
}
|
30 |
+
|
31 |
+
.chat-header h4 {
|
32 |
+
margin: 0;
|
33 |
+
}
|
34 |
+
|
35 |
+
.chat-close {
|
36 |
+
cursor: pointer;
|
37 |
+
}
|
38 |
+
|
39 |
+
.chat-body {
|
40 |
+
height: 400px;
|
41 |
+
overflow-y: scroll;
|
42 |
+
padding: 10px;
|
43 |
+
word-wrap: break-word;
|
44 |
+
display:flex;
|
45 |
+
flex-direction: column;
|
46 |
+
}
|
47 |
+
|
48 |
+
.chat-message {
|
49 |
+
margin: 10px;
|
50 |
+
}
|
51 |
+
|
52 |
+
.chat-message p {
|
53 |
+
margin: 0;
|
54 |
+
padding: 10px;
|
55 |
+
font-size: 16px;
|
56 |
+
line-height: 1.4;
|
57 |
+
position: relative;
|
58 |
+
word-wrap: break-word;
|
59 |
+
border-radius: 10px;
|
60 |
+
color: #fff;
|
61 |
+
}
|
62 |
+
|
63 |
+
.chat-message.user {
|
64 |
+
display: flex;
|
65 |
+
align-self: flex-end;
|
66 |
+
justify-content: flex-end;
|
67 |
+
text-align: right;
|
68 |
+
align-items: center;
|
69 |
+
background-color: rgba(0, 0, 139, 0.75);
|
70 |
+
border-top-right-radius: 0px;
|
71 |
+
border-bottom-right-radius: 0px;
|
72 |
+
border-bottom-left-radius: 10px;
|
73 |
+
word-wrap: break-word;
|
74 |
+
}
|
75 |
+
|
76 |
+
|
77 |
+
.chat-message.bot {
|
78 |
+
display: flex;
|
79 |
+
align-self: flex-start;
|
80 |
+
justify-content: flex-start;
|
81 |
+
text-align: left;
|
82 |
+
align-items: center;
|
83 |
+
background-color: rgba(0, 0, 139, 0.75);
|
84 |
+
border-top-left-radius: 0px;
|
85 |
+
border-bottom-right-radius: 10px;
|
86 |
+
border-bottom-left-radius: 0px;
|
87 |
+
word-wrap: break-word;
|
88 |
+
}
|
89 |
+
|
90 |
+
.chat-message.bot p {
|
91 |
+
margin: 0;
|
92 |
+
padding: 10px;
|
93 |
+
font-size: 16px;
|
94 |
+
line-height: 1.4;
|
95 |
+
position: relative;
|
96 |
+
word-wrap: break-word;
|
97 |
+
border-radius: 10px;
|
98 |
+
overflow-wrap: anywhere;
|
99 |
+
}
|
100 |
+
|
101 |
+
.chat-message.user:after {
|
102 |
+
content: "";
|
103 |
+
position: relative;
|
104 |
+
top: 0;
|
105 |
+
right: -15px;
|
106 |
+
width: 0;
|
107 |
+
height: 0;
|
108 |
+
border-top: 15px solid transparent;
|
109 |
+
border-bottom: 15px solid transparent;
|
110 |
+
border-left: 16px solid #00008BBF;
|
111 |
+
border-top-right-radius: 10px;
|
112 |
+
}
|
113 |
+
|
114 |
+
.chat-message.bot:before {
|
115 |
+
content: "";
|
116 |
+
position: relative;
|
117 |
+
top: 0;
|
118 |
+
left: -15px;
|
119 |
+
width: 0;
|
120 |
+
height: 0;
|
121 |
+
border-top: 15px solid transparent;
|
122 |
+
border-bottom: 15px solid transparent;
|
123 |
+
border-right: 15px solid #00008BBF;
|
124 |
+
border-top-left-radius: 10px;
|
125 |
+
}
|
126 |
+
|
127 |
+
|
128 |
+
.chat-input {
|
129 |
+
display: flex;
|
130 |
+
margin-top: 10px;
|
131 |
+
}
|
132 |
+
|
133 |
+
.chat-input input {
|
134 |
+
flex-grow: 1;
|
135 |
+
border: none;
|
136 |
+
border-radius: 5px;
|
137 |
+
padding: 8px 10px;
|
138 |
+
font-size: 16px;
|
139 |
+
margin-right: 10px;
|
140 |
+
box-shadow: 0px 0px 5px rgba(0, 0, 0, 0.1);
|
141 |
+
}
|
142 |
+
|
143 |
+
.chat-input button {
|
144 |
+
background-color: #00008BBF;
|
145 |
+
color: #fff;
|
146 |
+
border: none;
|
147 |
+
border-radius: 5px;
|
148 |
+
padding: 8px 10px;
|
149 |
+
font-size: 16px;
|
150 |
+
cursor: pointer;
|
151 |
+
box-shadow: 0px 0px 5px rgba(0, 0, 0, 0.1);
|
152 |
+
}
|
153 |
+
|
154 |
+
/* CSS for chat-container when minimized */
|
155 |
+
.chat-container.minimized {
|
156 |
+
min-width: 70px;
|
157 |
+
height: 70px;
|
158 |
+
border-radius: 50%;
|
159 |
+
position: fixed;
|
160 |
+
bottom: 10px;
|
161 |
+
right: 10px;
|
162 |
+
z-index: 9999;
|
163 |
+
background-color: #fff;
|
164 |
+
box-shadow: 0px 2px 10px rgba(0, 0, 0, 0.3);
|
165 |
+
cursor: pointer;
|
166 |
+
}
|
167 |
+
|
168 |
+
/* CSS for chat-bot-icon */
|
169 |
+
.chat-bot-icon {
|
170 |
+
font-size: 30px;
|
171 |
+
color: #00008BBF;
|
172 |
+
position: absolute;
|
173 |
+
top: 50%;
|
174 |
+
left: 50%;
|
175 |
+
transform: translate(-50%, -50%);
|
176 |
+
}
|
177 |
+
|
178 |
+
/* CSS for chat-header when not minimized */
|
179 |
+
.chat-header {
|
180 |
+
display: flex;
|
181 |
+
justify-content: space-between;
|
182 |
+
align-items: center;
|
183 |
+
background-color: #6c7ae0;
|
184 |
+
color: #fff;
|
185 |
+
padding: 10px;
|
186 |
+
border-top-left-radius: 5px;
|
187 |
+
border-top-right-radius: 5px;
|
188 |
+
}
|
189 |
+
|
190 |
+
/* CSS for chat-container when not minimized */
|
191 |
+
.chat-container:not(.minimized) {
|
192 |
+
border-radius: 5px;
|
193 |
+
position: fixed;
|
194 |
+
bottom: 10px;
|
195 |
+
right: 10px;
|
196 |
+
z-index: 9999;
|
197 |
+
background-color: #fff;
|
198 |
+
box-shadow: 0px 2px 10px rgba(0, 0, 0, 0.3);
|
199 |
+
}
|
200 |
+
|
201 |
+
/* CSS for chat-bot-icon when chat-container is minimized */
|
202 |
+
.chat-container.minimized .chat-bot-icon {
|
203 |
+
display: block;
|
204 |
+
}
|
205 |
+
|
206 |
+
/* CSS for chat-bot-icon when chat-container is not minimized */
|
207 |
+
.chat-container:not(.minimized) .chat-bot-icon {
|
208 |
+
display: none;
|
209 |
+
}
|
210 |
+
|
211 |
+
.queries {
|
212 |
+
|
213 |
+
padding: 8px 12px;
|
214 |
+
font-size: 16px;
|
215 |
+
font-weight: bold;
|
216 |
+
text-align: center;
|
217 |
+
text-decoration: none;
|
218 |
+
border: 0.5px solid #a5a0a0;
|
219 |
+
border-radius: 20px;
|
220 |
+
color: #000;
|
221 |
+
background-color: #343a404a;
|
222 |
+
cursor: pointer;
|
223 |
+
margin: 5px;
|
224 |
+
}
|
225 |
+
|
226 |
+
.queries:hover {
|
227 |
+
background-color: #343a40ad;
|
228 |
+
}
|
229 |
+
|
230 |
+
.queries:active {
|
231 |
+
background-color: #0053a4;
|
232 |
+
}
|
233 |
+
|
234 |
+
#button-container {
|
235 |
+
display: flex;
|
236 |
+
position: relative;
|
237 |
+
left: 2%;
|
238 |
+
top: 40%;
|
239 |
+
flex-direction: column;
|
240 |
+
justify-content: inherit;
|
241 |
+
align-items: center;
|
242 |
+
width: auto;
|
243 |
+
overflow-y: scroll;
|
244 |
+
max-height: 350px;
|
245 |
+
padding-top: 110%;
|
246 |
+
margin-top: 2%;
|
247 |
+
|
248 |
+
}
|
249 |
+
|
250 |
+
#button-container button {
|
251 |
+
margin-bottom: 10px;
|
252 |
+
}
|
253 |
+
|
254 |
+
.query-heading {
|
255 |
+
display: flex;
|
256 |
+
position: relative;
|
257 |
+
width: auto%;
|
258 |
+
background-color: #fff;
|
259 |
+
padding: 10px;
|
260 |
+
z-index: 1;
|
261 |
+
justify-content: inherit;
|
262 |
+
width: 100%;
|
263 |
+
border-bottom: 1px solid #2f4f4f5e;
|
264 |
+
}
|
265 |
+
|
266 |
+
.sample-query {
|
267 |
+
display: flex;
|
268 |
+
position: absolute;
|
269 |
+
left: 30%;
|
270 |
+
top: 10%;
|
271 |
+
flex-direction: column;
|
272 |
+
justify-content: flex-start;
|
273 |
+
align-items: center;
|
274 |
+
width: auto;
|
275 |
+
padding: 10px;
|
276 |
+
border: 1px solid #2f4f4f5e;
|
277 |
+
justify-content: center;
|
278 |
+
border-radius: 10px;
|
279 |
+
max-width: 30%;
|
280 |
+
}
|
281 |
+
|
282 |
+
::-webkit-scrollbar {
|
283 |
+
width: 8px;
|
284 |
+
}
|
285 |
+
|
286 |
+
::-webkit-scrollbar-track {
|
287 |
+
background-color: #f4f4f4;
|
288 |
+
}
|
289 |
+
|
290 |
+
::-webkit-scrollbar-thumb {
|
291 |
+
background-color: #a3bfe9a6;
|
292 |
+
border-radius: 20px;
|
293 |
+
}
|
294 |
+
|
295 |
+
|
296 |
+
|
templates/index.html
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html>
|
3 |
+
<head>
|
4 |
+
<meta charset="utf-8">
|
5 |
+
<title>MakerlabX3DPrinting QA</title>
|
6 |
+
<meta http-equiv="Content-Security-Policy" content="upgrade-insecure-requests">
|
7 |
+
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css">
|
8 |
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
|
9 |
+
<link rel="stylesheet" href="static/style.css">
|
10 |
+
</head>
|
11 |
+
<body>
|
12 |
+
<div class = "sample-query">
|
13 |
+
<div class="query-heading"><h4>Sample Queries</h4></div>
|
14 |
+
<div id="button-container">
|
15 |
+
</div></div>
|
16 |
+
<div class="chat-container">
|
17 |
+
<div class="chat-header">
|
18 |
+
<h4>Makerlab Q&A Bot</h4>
|
19 |
+
<i class="fa fa-close chat-close"></i>
|
20 |
+
</div>
|
21 |
+
<div class="chat-bot-icon">
|
22 |
+
<i class="fa fa-android"></i> <!-- Replace with your bot icon -->
|
23 |
+
</div>
|
24 |
+
<div class="chat-body chat-messages round"></div>
|
25 |
+
<div class="chat-input">
|
26 |
+
<input type="text" placeholder="Type your message">
|
27 |
+
<button class="chat_submit">Send</button>
|
28 |
+
</div>
|
29 |
+
</div>
|
30 |
+
<!--<script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.5.1/socket.io.js" integrity="sha512-sY2t8W1xNQ2yB+1RFXJv+wwhdN7CHX9Z+fhM7JH/3B3q1x7VJBOwKe+zb7VW0EC8XG5M5rjBQd7+47F5fQlhKQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>-->
|
31 |
+
<script src="https://cdn.socket.io/4.4.1/socket.io.min.js"></script>
|
32 |
+
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
|
33 |
+
<script src="{{ url_for('static', filename='chatbot.js') }}"></script>
|
34 |
+
</body>
|
35 |
+
</html>
|
utils.py
ADDED
@@ -0,0 +1,335 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pickle
|
3 |
+
import re
|
4 |
+
import time
|
5 |
+
from typing import List, Union
|
6 |
+
from urllib.parse import urlparse, urljoin
|
7 |
+
|
8 |
+
import faiss
|
9 |
+
import requests
|
10 |
+
from PyPDF2 import PdfReader
|
11 |
+
from bs4 import BeautifulSoup
|
12 |
+
from langchain import OpenAI, LLMChain
|
13 |
+
from langchain.agents import ConversationalAgent
|
14 |
+
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
|
15 |
+
from langchain.prompts import BaseChatPromptTemplate
|
16 |
+
from langchain.chains import ConversationalRetrievalChain
|
17 |
+
from langchain.docstore.document import Document
|
18 |
+
from langchain.embeddings import OpenAIEmbeddings
|
19 |
+
from langchain.memory import ConversationBufferWindowMemory
|
20 |
+
from langchain.schema import AgentAction, AgentFinish, HumanMessage
|
21 |
+
from langchain.text_splitter import CharacterTextSplitter
|
22 |
+
from langchain.vectorstores.faiss import FAISS
|
23 |
+
|
24 |
+
book_url = 'https://g.co/kgs/2VFC7u'
|
25 |
+
book_file = "Book.pdf"
|
26 |
+
url = 'https://makerlab.illinois.edu/'
|
27 |
+
|
28 |
+
pickle_file = "open_ai.pkl"
|
29 |
+
index_file = "open_ai.index"
|
30 |
+
|
31 |
+
gpt_3_5 = OpenAI(model_name='gpt-3.5-turbo',temperature=0)
|
32 |
+
|
33 |
+
embeddings = OpenAIEmbeddings()
|
34 |
+
|
35 |
+
chat_history = []
|
36 |
+
|
37 |
+
memory = ConversationBufferWindowMemory(memory_key="chat_history")
|
38 |
+
|
39 |
+
gpt_3_5_index = None
|
40 |
+
|
41 |
+
class CustomOutputParser(AgentOutputParser):
|
42 |
+
|
43 |
+
def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
|
44 |
+
# Check if agent replied without using tools
|
45 |
+
if "AI:" in llm_output:
|
46 |
+
return AgentFinish(return_values={"output": llm_output.split("AI:")[-1].strip()},
|
47 |
+
log=llm_output)
|
48 |
+
# Check if agent should finish
|
49 |
+
if "Final Answer:" in llm_output:
|
50 |
+
return AgentFinish(
|
51 |
+
# Return values is generally always a dictionary with a single `output` key
|
52 |
+
# It is not recommended to try anything else at the moment :)
|
53 |
+
return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
|
54 |
+
log=llm_output,
|
55 |
+
)
|
56 |
+
# Parse out the action and action input
|
57 |
+
regex = r"Action: (.*?)[\n]*Action Input:[\s]*(.*)"
|
58 |
+
match = re.search(regex, llm_output, re.DOTALL)
|
59 |
+
if not match:
|
60 |
+
raise ValueError(f"Could not parse LLM output: `{llm_output}`")
|
61 |
+
action = match.group(1).strip()
|
62 |
+
action_input = match.group(2)
|
63 |
+
# Return the action and action input
|
64 |
+
return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)
|
65 |
+
|
66 |
+
# Set up a prompt template
|
67 |
+
class CustomPromptTemplate(BaseChatPromptTemplate):
|
68 |
+
# The template to use
|
69 |
+
template: str
|
70 |
+
# The list of tools available
|
71 |
+
tools: List[Tool]
|
72 |
+
|
73 |
+
def format_messages(self, **kwargs) -> str:
|
74 |
+
# Get the intermediate steps (AgentAction, Observation tuples)
|
75 |
+
# Format them in a particular way
|
76 |
+
intermediate_steps = kwargs.pop("intermediate_steps")
|
77 |
+
thoughts = ""
|
78 |
+
for action, observation in intermediate_steps:
|
79 |
+
thoughts += action.log
|
80 |
+
thoughts += f"\nObservation: {observation}\nThought: "
|
81 |
+
# Set the agent_scratchpad variable to that value
|
82 |
+
kwargs["agent_scratchpad"] = thoughts
|
83 |
+
# Create a tools variable from the list of tools provided
|
84 |
+
kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
|
85 |
+
# Create a list of tool names for the tools provided
|
86 |
+
kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
|
87 |
+
formatted = self.template.format(**kwargs)
|
88 |
+
return [HumanMessage(content=formatted)]
|
89 |
+
|
90 |
+
def get_search_index():
|
91 |
+
global gpt_3_5_index
|
92 |
+
if os.path.isfile(pickle_file) and os.path.isfile(index_file) and os.path.getsize(pickle_file) > 0:
|
93 |
+
# Load index from pickle file
|
94 |
+
with open(pickle_file, "rb") as f:
|
95 |
+
search_index = pickle.load(f)
|
96 |
+
else:
|
97 |
+
search_index = create_index()
|
98 |
+
|
99 |
+
gpt_3_5_index = search_index
|
100 |
+
|
101 |
+
|
102 |
+
def create_index():
|
103 |
+
source_chunks = create_chunk_documents()
|
104 |
+
search_index = search_index_from_docs(source_chunks)
|
105 |
+
faiss.write_index(search_index.index, index_file)
|
106 |
+
# Save index to pickle file
|
107 |
+
with open(pickle_file, "wb") as f:
|
108 |
+
pickle.dump(search_index, f)
|
109 |
+
return search_index
|
110 |
+
|
111 |
+
|
112 |
+
def create_chunk_documents():
|
113 |
+
sources = fetch_data_for_embeddings(url, book_file, book_url)
|
114 |
+
# print("sources" + str(len(sources)))
|
115 |
+
|
116 |
+
splitter = CharacterTextSplitter(separator=" ", chunk_size=800, chunk_overlap=0)
|
117 |
+
|
118 |
+
source_chunks = splitter.split_documents(sources)
|
119 |
+
|
120 |
+
for chunk in source_chunks:
|
121 |
+
print("Size of chunk: " + str(len(chunk.page_content) + len(chunk.metadata)))
|
122 |
+
if chunk.page_content is None or chunk.page_content == '':
|
123 |
+
print("removing chunk: "+ chunk.page_content)
|
124 |
+
source_chunks.remove(chunk)
|
125 |
+
elif len(chunk.page_content) >=1000:
|
126 |
+
print("splitting document")
|
127 |
+
source_chunks.extend(splitter.split_documents([chunk]))
|
128 |
+
# print("Chunks: " + str(len(source_chunks)) + "and type " + str(type(source_chunks)))
|
129 |
+
return source_chunks
|
130 |
+
|
131 |
+
|
132 |
+
def fetch_data_for_embeddings(url, book_file, book_url):
|
133 |
+
sources = get_website_data(url)
|
134 |
+
sources.extend(get_document_data(book_file, book_url))
|
135 |
+
return sources
|
136 |
+
|
137 |
+
def get_website_data(index_url):
|
138 |
+
# Get all page paths from index
|
139 |
+
paths = get_paths(index_url)
|
140 |
+
|
141 |
+
# Filter out invalid links and join them with the base URL
|
142 |
+
links = get_links(index_url, paths)
|
143 |
+
|
144 |
+
return get_content_from_links(links, index_url)
|
145 |
+
|
146 |
+
|
147 |
+
def get_content_from_links(links, index_url):
|
148 |
+
content_list = []
|
149 |
+
for link in set(links):
|
150 |
+
if link.startswith(index_url):
|
151 |
+
page_data = requests.get(link).content
|
152 |
+
soup = BeautifulSoup(page_data, "html.parser")
|
153 |
+
|
154 |
+
# Get page content
|
155 |
+
content = soup.get_text(separator="\n")
|
156 |
+
# print(link)
|
157 |
+
|
158 |
+
# Get page metadata
|
159 |
+
metadata = {"source": link}
|
160 |
+
|
161 |
+
content_list.append(Document(page_content=content, metadata=metadata))
|
162 |
+
time.sleep(1)
|
163 |
+
# print("content list" + str(len(content_list)))
|
164 |
+
return content_list
|
165 |
+
|
166 |
+
|
167 |
+
def get_paths(index_url):
|
168 |
+
index_data = requests.get(index_url).content
|
169 |
+
soup = BeautifulSoup(index_data, "html.parser")
|
170 |
+
paths = set([a.get('href') for a in soup.find_all('a', href=True)])
|
171 |
+
return paths
|
172 |
+
|
173 |
+
|
174 |
+
def get_links(index_url, paths):
|
175 |
+
links = []
|
176 |
+
for path in paths:
|
177 |
+
url = urljoin(index_url, path)
|
178 |
+
parsed_url = urlparse(url)
|
179 |
+
if parsed_url.scheme in ["http", "https"] and "squarespace" not in parsed_url.netloc:
|
180 |
+
links.append(url)
|
181 |
+
return links
|
182 |
+
|
183 |
+
|
184 |
+
def get_document_data(book_file, book_url):
|
185 |
+
document_list = []
|
186 |
+
with open(book_file, 'rb') as f:
|
187 |
+
pdf_reader = PdfReader(f)
|
188 |
+
for i in range(len(pdf_reader.pages)):
|
189 |
+
page_text = pdf_reader.pages[i].extract_text()
|
190 |
+
metadata = {"source": book_url}
|
191 |
+
document_list.append(Document(page_content=page_text, metadata=metadata))
|
192 |
+
|
193 |
+
# print("document list" + str(len(document_list)))
|
194 |
+
return document_list
|
195 |
+
|
196 |
+
def search_index_from_docs(source_chunks):
|
197 |
+
# Create index from chunk documents
|
198 |
+
# print("Size of chunk" + str(len(source_chunks)))
|
199 |
+
search_index = FAISS.from_texts([doc.page_content for doc in source_chunks], embeddings, metadatas=[doc.metadata for doc in source_chunks])
|
200 |
+
return search_index
|
201 |
+
|
202 |
+
|
203 |
+
def get_qa_chain(gpt_3_5_index):
|
204 |
+
global gpt_3_5
|
205 |
+
print("index: " + str(gpt_3_5_index))
|
206 |
+
return ConversationalRetrievalChain.from_llm(gpt_3_5, chain_type="stuff", get_chat_history=get_chat_history,
|
207 |
+
retriever=gpt_3_5_index.as_retriever(), return_source_documents=True, verbose=True)
|
208 |
+
|
209 |
+
def get_chat_history(inputs) -> str:
|
210 |
+
res = []
|
211 |
+
for human, ai in inputs:
|
212 |
+
res.append(f"Human:{human}\nAI:{ai}")
|
213 |
+
return "\n".join(res)
|
214 |
+
|
215 |
+
|
216 |
+
def generate_answer(question) -> str:
|
217 |
+
global chat_history, gpt_3_5_index
|
218 |
+
gpt_3_5_chain = get_qa_chain(gpt_3_5_index)
|
219 |
+
result = gpt_3_5_chain(
|
220 |
+
{"question": question, "chat_history": chat_history,"vectordbkwargs": {"search_distance": 0.8}})
|
221 |
+
print("REsult: " + str(result))
|
222 |
+
chat_history = [(question, result["answer"])]
|
223 |
+
sources = []
|
224 |
+
|
225 |
+
for document in result['source_documents']:
|
226 |
+
source = document.metadata['source']
|
227 |
+
sources.append(source)
|
228 |
+
|
229 |
+
source = ',\n'.join(set(sources))
|
230 |
+
return result['answer'] + '\nSOURCES: ' + source
|
231 |
+
|
232 |
+
|
233 |
+
def get_agent_chain(prompt, tools):
|
234 |
+
global gpt_3_5
|
235 |
+
# output_parser = CustomOutputParser()
|
236 |
+
llm_chain = LLMChain(llm=gpt_3_5, prompt=prompt)
|
237 |
+
agent = ConversationalAgent(llm_chain=llm_chain, tools=tools, verbose=True)
|
238 |
+
agent_chain = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory,
|
239 |
+
intermediate_steps=True)
|
240 |
+
return agent_chain
|
241 |
+
|
242 |
+
|
243 |
+
def get_prompt_and_tools():
|
244 |
+
tools = get_tools()
|
245 |
+
|
246 |
+
prefix = """Have a conversation with a human, answering the following questions as best you can.
|
247 |
+
Always try to use Vectorstore first.
|
248 |
+
Your name is Makerlab Bot because you are a personal assistant of Makerlab. You have access to the following tools:"""
|
249 |
+
suffix = """Begin! If you use any tool, ALWAYS return a "SOURCES" part in your answer"
|
250 |
+
|
251 |
+
{chat_history}
|
252 |
+
Question: {input}
|
253 |
+
{agent_scratchpad}
|
254 |
+
SOURCES:"""
|
255 |
+
prompt = ConversationalAgent.create_prompt(
|
256 |
+
tools,
|
257 |
+
prefix=prefix,
|
258 |
+
suffix=suffix,
|
259 |
+
input_variables=["input", "chat_history", "agent_scratchpad"]
|
260 |
+
)
|
261 |
+
# print("Template: " + prompt.template)
|
262 |
+
return prompt, tools
|
263 |
+
|
264 |
+
|
265 |
+
def get_tools():
|
266 |
+
tools = [
|
267 |
+
Tool(
|
268 |
+
name="Vectorstore",
|
269 |
+
func=generate_answer,
|
270 |
+
description="useful for when you need to answer questions about the Makerlab or 3D Printing.",
|
271 |
+
return_direct=True
|
272 |
+
)]
|
273 |
+
return tools
|
274 |
+
|
275 |
+
def get_custom_agent(prompt, tools):
|
276 |
+
|
277 |
+
llm_chain = LLMChain(llm=gpt_3_5, prompt=prompt)
|
278 |
+
|
279 |
+
output_parser = CustomOutputParser()
|
280 |
+
tool_names = [tool.name for tool in tools]
|
281 |
+
agent = LLMSingleActionAgent(
|
282 |
+
llm_chain=llm_chain,
|
283 |
+
output_parser=output_parser,
|
284 |
+
stop=["\nObservation:"],
|
285 |
+
allowed_tools=tool_names
|
286 |
+
)
|
287 |
+
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory,
|
288 |
+
intermediate_steps=True)
|
289 |
+
return agent_executor
|
290 |
+
|
291 |
+
def get_prompt_and_tools_for_custom_agent():
|
292 |
+
template = """
|
293 |
+
Have a conversation with a human, answering the following questions as best you can.
|
294 |
+
Always try to use Vectorstore first.
|
295 |
+
Your name is Makerlab Bot because you are a personal assistant of Makerlab. You have access to the following tools:
|
296 |
+
|
297 |
+
{tools}
|
298 |
+
|
299 |
+
To answer for the new input, use the following format:
|
300 |
+
|
301 |
+
New Input: the input question you must answer
|
302 |
+
Thought: Do I need to use a tool? Yes
|
303 |
+
Action: the action to take, should be one of [{tool_names}]
|
304 |
+
Action Input: the input to the action
|
305 |
+
Observation: the result of the action
|
306 |
+
... (this Thought/Action/Action Input/Observation can repeat N times)
|
307 |
+
Thought: I now know the final answer
|
308 |
+
Final Answer: the final answer to the original input question. SOURCES: the sources referred to find the final answer
|
309 |
+
|
310 |
+
|
311 |
+
When you have a response to say to the Human and DO NOT need to use a tool:
|
312 |
+
1. DO NOT return "SOURCES" if you did not use any tool.
|
313 |
+
2. You MUST use this format:
|
314 |
+
```
|
315 |
+
Thought: Do I need to use a tool? No
|
316 |
+
AI: [your response here]
|
317 |
+
```
|
318 |
+
|
319 |
+
Begin! Remember to speak as a personal assistant when giving your final answer.
|
320 |
+
ALWAYS return a "SOURCES" part in your answer, if you used any tool.
|
321 |
+
|
322 |
+
Previous conversation history:
|
323 |
+
{chat_history}
|
324 |
+
New input: {input}
|
325 |
+
{agent_scratchpad}
|
326 |
+
SOURCES:"""
|
327 |
+
tools = get_tools()
|
328 |
+
prompt = CustomPromptTemplate(
|
329 |
+
template=template,
|
330 |
+
tools=tools,
|
331 |
+
# This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically
|
332 |
+
# This includes the `intermediate_steps` variable because that is needed
|
333 |
+
input_variables=["input", "intermediate_steps", "chat_history"]
|
334 |
+
)
|
335 |
+
return prompt, tools
|