Spaces:
Runtime error
Runtime error
Merge branch 'main' into general_fixes
Browse files- README.md +118 -13
- example.ipynb +183 -37
- megabots/__init__.py +7 -222
- megabots/bot.py +207 -0
- megabots/memory.py +45 -0
- megabots/prompt.py +18 -0
- megabots/utils.py +39 -0
- megabots/{vectorstores.py β vectorstore.py} +6 -4
- setup.py +1 -1
- tests/test_memory.py +25 -0
README.md
CHANGED
@@ -5,7 +5,6 @@
|
|
5 |
[](https://github.com/psf/black)
|
6 |
[](https://github.com/momegas/megabots/blob/main/LICENCE)
|
7 |

|
8 |
-
<a href="https://www.producthunt.com/posts/megabots-2?utm_source=badge-featured&utm_medium=badge&utm_souce=badge-megabots-2" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/featured.svg?post_id=390033&theme=light" alt="Megabots - π€ Production ready full-stack LLM apps made mega-easy | Product Hunt" style="width: 150px; height: 34px;" width="250" height="54" /></a>
|
9 |
|
10 |
|
11 |
π€ Megabots provides State-of-the-art, production ready LLM apps made mega-easy, so you don't have to build them from scratch π€― Create a bot, now π«΅
|
@@ -63,13 +62,115 @@ qnabot = bot("qna-over-docs", index="./index")
|
|
63 |
|
64 |
# Change the model
|
65 |
qnabot = bot("qna-over-docs", model="text-davinci-003")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
71 |
```
|
72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
You can also create a FastAPI app that will expose the bot as an API using the create_app function.
|
74 |
Assuming you file is called `main.py` run `uvicorn main:app --reload` to run the API locally.
|
75 |
You should then be able to visit `http://localhost:8000/docs` to see the API documentation.
|
@@ -80,6 +181,8 @@ from megabots import bot, create_api
|
|
80 |
app = create_app(bot("qna-over-docs"))
|
81 |
```
|
82 |
|
|
|
|
|
83 |
You can expose a gradio UI for the bot using `create_interface` function.
|
84 |
Assuming your file is called `ui.py` run `gradio qnabot/ui.py` to run the UI locally.
|
85 |
You should then be able to visit `http://127.0.0.1:7860` to see the API documentation.
|
@@ -94,14 +197,16 @@ demo = create_interface(bot("qna-over-docs"))
|
|
94 |
|
95 |
The `bot` function should serve as the starting point for creating and customising your bot. Below is a list of the available arguments in `bot`.
|
96 |
|
97 |
-
| Argument
|
98 |
-
|
|
99 |
-
| task
|
100 |
-
| index
|
101 |
-
| model
|
102 |
-
|
|
103 |
-
|
|
104 |
-
|
|
|
|
|
|
105 |
|
106 |
## How QnA bot works
|
107 |
|
|
|
5 |
[](https://github.com/psf/black)
|
6 |
[](https://github.com/momegas/megabots/blob/main/LICENCE)
|
7 |

|
|
|
8 |
|
9 |
|
10 |
π€ Megabots provides State-of-the-art, production ready LLM apps made mega-easy, so you don't have to build them from scratch π€― Create a bot, now π«΅
|
|
|
62 |
|
63 |
# Change the model
|
64 |
qnabot = bot("qna-over-docs", model="text-davinci-003")
|
65 |
+
```
|
66 |
+
|
67 |
+
## Changing the bot's prompt
|
68 |
+
|
69 |
+
You can change the bots promnpt to customize it to your needs. In the `qna-over-docs` type of bot you will need to pass 2 variables for the `context` (knwoledge searched from the index) and the `question` (the human question).
|
70 |
+
|
71 |
+
```python
|
72 |
+
from megabots import bot
|
73 |
+
|
74 |
+
prompt = """
|
75 |
+
Use the following pieces of context to answer the question at the end.
|
76 |
+
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
77 |
+
Answer in the style of Tony Stark.
|
78 |
+
|
79 |
+
{context}
|
80 |
+
|
81 |
+
Question: {question}
|
82 |
+
Helpful humorous answer:"""
|
83 |
+
|
84 |
+
qnabot = bot("qna-over-docs", index="./index.pkl", prompt=prompt)
|
85 |
+
|
86 |
+
qnabot.ask("what was the first roster of the avengers?")
|
87 |
+
```
|
88 |
+
|
89 |
+
## Working with memory
|
90 |
+
|
91 |
+
You can easily add memory to your `bot` using the `memory` parameter. It accepts a string with the type of the memory to be used. This defaults to some sane dafaults.
|
92 |
+
Should you need more configuration, you can use the `memory` function and pass the type of memory and the configuration you need.
|
93 |
+
|
94 |
+
```python
|
95 |
+
from megabots import bot
|
96 |
+
|
97 |
+
qnabot = bot("qna-over-docs", index="./index.pkl", memory="conversation-buffer")
|
98 |
+
|
99 |
+
print(qnabot.ask("who is iron man?"))
|
100 |
+
print(qnabot.ask("was he in the first roster?"))
|
101 |
+
# Bot should understand who "he" refers to.
|
102 |
+
```
|
103 |
+
|
104 |
+
Or using the `memory`factory function
|
105 |
+
|
106 |
+
```python
|
107 |
+
from megabots import bot, memory
|
108 |
+
|
109 |
+
mem("conversation-buffer-window", k=5)
|
110 |
+
|
111 |
+
qnabot = bot("qna-over-docs", index="./index.pkl", memory=mem)
|
112 |
+
|
113 |
+
print(qnabot.ask("who is iron man?"))
|
114 |
+
print(qnabot.ask("was he in the first roster?"))
|
115 |
+
```
|
116 |
+
|
117 |
+
NOTE: For the `qna-over-docs` bot, when using memory and passing your custom prompt, it is important to remember to pass one more variable to your custom prompt to facilitate for chat history. The variable name is `history`.
|
118 |
+
|
119 |
+
```python
|
120 |
+
from megabots import bot
|
121 |
+
|
122 |
+
prompt = """
|
123 |
+
Use the following pieces of context to answer the question at the end.
|
124 |
+
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
125 |
+
|
126 |
+
{context}
|
127 |
|
128 |
+
{history}
|
129 |
+
Human: {question}
|
130 |
+
AI:"""
|
131 |
+
|
132 |
+
qnabot = bot("qna-over-docs", prompt=prompt, index="./index.pkl", memory="conversation-buffer")
|
133 |
+
|
134 |
+
print(qnabot.ask("who is iron man?"))
|
135 |
+
print(qnabot.ask("was he in the first roster?"))
|
136 |
```
|
137 |
|
138 |
+
## Using Megabots with Milvus (more DBs comming soon)
|
139 |
+
|
140 |
+
Megabots `bot` can also use Milvus as a backend for its search engine. You can find an example of how to do it below.
|
141 |
+
|
142 |
+
In order to run Milvus you need to follow [this guide](https://milvus.io/docs/example_code.md) to download a docker compose file and run it.
|
143 |
+
The command is:
|
144 |
+
|
145 |
+
```bash
|
146 |
+
wget https://raw.githubusercontent.com/milvus-io/pymilvus/v2.2.7/examples/hello_milvus.py
|
147 |
+
```
|
148 |
+
|
149 |
+
You can then [install Attu](https://milvus.io/docs/attu_install-docker.md) as a management tool for Milvus
|
150 |
+
|
151 |
+
```python
|
152 |
+
from megabots import bot
|
153 |
+
|
154 |
+
# Attach a vectorstore by passing the name of the database. Default port for milvus is 19530 and default host is localhost
|
155 |
+
# Point it to your files directory so that it can index the files and add them to the vectorstore
|
156 |
+
bot = bot("qna-over-docs", index="./examples/files/", vectorstore="milvus")
|
157 |
+
|
158 |
+
bot.ask("what was the first roster of the avengers?")
|
159 |
+
```
|
160 |
+
|
161 |
+
Or use the `vectorstore` factory function for more customisation
|
162 |
+
|
163 |
+
```python
|
164 |
+
|
165 |
+
from megabots import bot, vectorstore
|
166 |
+
|
167 |
+
milvus = vectorstore("milvus", host="localhost", port=19530)
|
168 |
+
|
169 |
+
bot = bot("qna-over-docs", index="./examples/files/", vectorstore=milvus)
|
170 |
+
```
|
171 |
+
|
172 |
+
## Exposing an API with FastAPI
|
173 |
+
|
174 |
You can also create a FastAPI app that will expose the bot as an API using the create_app function.
|
175 |
Assuming you file is called `main.py` run `uvicorn main:app --reload` to run the API locally.
|
176 |
You should then be able to visit `http://localhost:8000/docs` to see the API documentation.
|
|
|
181 |
app = create_app(bot("qna-over-docs"))
|
182 |
```
|
183 |
|
184 |
+
## Exposing a Gradio chat-like interface
|
185 |
+
|
186 |
You can expose a gradio UI for the bot using `create_interface` function.
|
187 |
Assuming your file is called `ui.py` run `gradio qnabot/ui.py` to run the UI locally.
|
188 |
You should then be able to visit `http://127.0.0.1:7860` to see the API documentation.
|
|
|
197 |
|
198 |
The `bot` function should serve as the starting point for creating and customising your bot. Below is a list of the available arguments in `bot`.
|
199 |
|
200 |
+
| Argument | Description |
|
201 |
+
| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
202 |
+
| task | The type of bot to create. Available options: `qna-over-docs`. More comming soon |
|
203 |
+
| index | Specifies the index to use for the bot. It can either be a saved index file (e.g., `index.pkl`) or a directory of documents (e.g., `./index`). In the case of the directory the index will be automatically created. If no index is specified `bot` will look for `index.pkl` or `./index` |
|
204 |
+
| model | The name of the model to use for the bot. You can specify a different model by providing its name, like "text-davinci-003". Supported models: `gpt-3.5-turbo` (default),`text-davinci-003` More comming soon. |
|
205 |
+
| prompt | A string template for the prompt, which defines the format of the question and context passed to the model. The template should include placeholder variables like so: `context`, `{question}` and in the case of using memory `history`. |
|
206 |
+
| memory | The type of memory to be used by the bot. Can be a string with the type of the memory or you can use `memory` factory function. Supported memories: `conversation-buffer`, `conversation-buffer-window` |
|
207 |
+
| vectorstore | The vectorstore to be used for the index. Can be a string with the name of the databse or you can use `vectorstore` factory function. Supported DBs: `milvus`. |
|
208 |
+
|
209 |
+
| sources | When `sources` is `True` the bot will also include sources in the response. A known [issue](https://github.com/hwchase17/langchain/issues/2858) exists, where if you pass a custom prompt with sources the code breaks. |
|
210 |
|
211 |
## How QnA bot works
|
212 |
|
example.ipynb
CHANGED
@@ -7,17 +7,7 @@
|
|
7 |
"source": [
|
8 |
"# Examples\n",
|
9 |
"\n",
|
10 |
-
"Below you can find some examples of how to use the π€ `Megabots` library
|
11 |
-
]
|
12 |
-
},
|
13 |
-
{
|
14 |
-
"cell_type": "code",
|
15 |
-
"execution_count": 13,
|
16 |
-
"metadata": {},
|
17 |
-
"outputs": [],
|
18 |
-
"source": [
|
19 |
-
"from megabots import bot\n",
|
20 |
-
"from dotenv import load_dotenv"
|
21 |
]
|
22 |
},
|
23 |
{
|
@@ -34,9 +24,17 @@
|
|
34 |
},
|
35 |
{
|
36 |
"cell_type": "code",
|
37 |
-
"execution_count":
|
38 |
"metadata": {},
|
39 |
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
{
|
41 |
"name": "stdout",
|
42 |
"output_type": "stream",
|
@@ -51,14 +49,17 @@
|
|
51 |
"'The first roster of the Avengers included Iron Man, Thor, Hulk, Ant-Man, and the Wasp.'"
|
52 |
]
|
53 |
},
|
54 |
-
"execution_count":
|
55 |
"metadata": {},
|
56 |
"output_type": "execute_result"
|
57 |
}
|
58 |
],
|
59 |
"source": [
|
|
|
|
|
60 |
"qnabot = bot(\"qna-over-docs\", index=\"./index.pkl\")\n",
|
61 |
-
"
|
|
|
62 |
]
|
63 |
},
|
64 |
{
|
@@ -68,12 +69,12 @@
|
|
68 |
"source": [
|
69 |
"### Changing the bot's prompt\n",
|
70 |
"\n",
|
71 |
-
"You can change the bot's
|
72 |
]
|
73 |
},
|
74 |
{
|
75 |
"cell_type": "code",
|
76 |
-
"execution_count":
|
77 |
"metadata": {},
|
78 |
"outputs": [
|
79 |
{
|
@@ -87,33 +88,29 @@
|
|
87 |
{
|
88 |
"data": {
|
89 |
"text/plain": [
|
90 |
-
"
|
91 |
]
|
92 |
},
|
93 |
-
"execution_count":
|
94 |
"metadata": {},
|
95 |
"output_type": "execute_result"
|
96 |
}
|
97 |
],
|
98 |
"source": [
|
99 |
-
"
|
|
|
|
|
100 |
"Use the following pieces of context to answer the question at the end. \n",
|
101 |
"If you don't know the answer, just say that you don't know, don't try to make up an answer.\n",
|
102 |
-
"
|
103 |
-
"
|
104 |
"{context}\n",
|
105 |
"\n",
|
106 |
"Question: {question}\n",
|
107 |
"Helpful humorous answer:\"\"\"\n",
|
108 |
"\n",
|
109 |
-
"
|
110 |
"\n",
|
111 |
-
"qnabot = bot(\n",
|
112 |
-
" \"qna-over-docs\",\n",
|
113 |
-
" index=\"./index.pkl\",\n",
|
114 |
-
" prompt_template=prompt_template,\n",
|
115 |
-
" prompt_variables=[\"context\", \"question\"],\n",
|
116 |
-
")\n",
|
117 |
"qnabot.ask(\"what was the first roster of the avengers?\")\n"
|
118 |
]
|
119 |
},
|
@@ -128,16 +125,17 @@
|
|
128 |
"\n",
|
129 |
"In order to run Milvus you need to follow [this guide](https://milvus.io/docs/example_code.md) to download a docker compose file and run it.\n",
|
130 |
"The command is:\n",
|
131 |
-
"
|
132 |
"```bash\n",
|
133 |
"wget https://raw.githubusercontent.com/milvus-io/pymilvus/v2.2.7/examples/hello_milvus.py\n",
|
134 |
"```\n",
|
135 |
-
"
|
|
|
136 |
]
|
137 |
},
|
138 |
{
|
139 |
"cell_type": "code",
|
140 |
-
"execution_count":
|
141 |
"metadata": {},
|
142 |
"outputs": [
|
143 |
{
|
@@ -153,22 +151,170 @@
|
|
153 |
"'The first roster of the Avengers included Iron Man, Thor, Hulk, Ant-Man, and the Wasp.'"
|
154 |
]
|
155 |
},
|
156 |
-
"execution_count":
|
157 |
"metadata": {},
|
158 |
"output_type": "execute_result"
|
159 |
}
|
160 |
],
|
161 |
"source": [
|
162 |
-
"from megabots import bot
|
163 |
-
"\n",
|
164 |
-
"# Create a vectorstore object. Default port is 19530 and default host is localhost\n",
|
165 |
-
"milvus = vectorstore(\"milvus\")\n",
|
166 |
"\n",
|
|
|
167 |
"# Point it to your files directory so that it can index the files and add them to the vectorstore\n",
|
168 |
-
"bot = bot(\"qna-over-docs\", index=\"./examples/files/\", vectorstore
|
169 |
"\n",
|
170 |
"bot.ask(\"what was the first roster of the avengers?\")\n"
|
171 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
}
|
173 |
],
|
174 |
"metadata": {
|
|
|
7 |
"source": [
|
8 |
"# Examples\n",
|
9 |
"\n",
|
10 |
+
"Below you can find some examples of how to use the π€ `Megabots` library.\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
]
|
12 |
},
|
13 |
{
|
|
|
24 |
},
|
25 |
{
|
26 |
"cell_type": "code",
|
27 |
+
"execution_count": 1,
|
28 |
"metadata": {},
|
29 |
"outputs": [
|
30 |
+
{
|
31 |
+
"name": "stderr",
|
32 |
+
"output_type": "stream",
|
33 |
+
"text": [
|
34 |
+
"/Users/momegas/Desktop/qnabot/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
35 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
36 |
+
]
|
37 |
+
},
|
38 |
{
|
39 |
"name": "stdout",
|
40 |
"output_type": "stream",
|
|
|
49 |
"'The first roster of the Avengers included Iron Man, Thor, Hulk, Ant-Man, and the Wasp.'"
|
50 |
]
|
51 |
},
|
52 |
+
"execution_count": 1,
|
53 |
"metadata": {},
|
54 |
"output_type": "execute_result"
|
55 |
}
|
56 |
],
|
57 |
"source": [
|
58 |
+
"from megabots import bot\n",
|
59 |
+
"\n",
|
60 |
"qnabot = bot(\"qna-over-docs\", index=\"./index.pkl\")\n",
|
61 |
+
"\n",
|
62 |
+
"qnabot.ask(\"what was the first roster of the avengers?\")\n"
|
63 |
]
|
64 |
},
|
65 |
{
|
|
|
69 |
"source": [
|
70 |
"### Changing the bot's prompt\n",
|
71 |
"\n",
|
72 |
+
"You can change the bot's promnpt to customize it to your needs. In the `qna-over-docs` type of bot you will need to pass 2 variables for the `context` (knwoledge searched from the index) and the `question` (the human question).\n"
|
73 |
]
|
74 |
},
|
75 |
{
|
76 |
"cell_type": "code",
|
77 |
+
"execution_count": 2,
|
78 |
"metadata": {},
|
79 |
"outputs": [
|
80 |
{
|
|
|
88 |
{
|
89 |
"data": {
|
90 |
"text/plain": [
|
91 |
+
"'The first roster of the Avengers included Iron Man, Thor, Hulk, Ant-Man, and the Wasp.'"
|
92 |
]
|
93 |
},
|
94 |
+
"execution_count": 2,
|
95 |
"metadata": {},
|
96 |
"output_type": "execute_result"
|
97 |
}
|
98 |
],
|
99 |
"source": [
|
100 |
+
"from megabots import bot\n",
|
101 |
+
"\n",
|
102 |
+
"prompt = \"\"\"\n",
|
103 |
"Use the following pieces of context to answer the question at the end. \n",
|
104 |
"If you don't know the answer, just say that you don't know, don't try to make up an answer.\n",
|
105 |
+
"Answer in the style of Tony Stark.\n",
|
106 |
+
"\n",
|
107 |
"{context}\n",
|
108 |
"\n",
|
109 |
"Question: {question}\n",
|
110 |
"Helpful humorous answer:\"\"\"\n",
|
111 |
"\n",
|
112 |
+
"qnabot = bot(\"qna-over-docs\", index=\"./index.pkl\", prompt=prompt)\n",
|
113 |
"\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
"qnabot.ask(\"what was the first roster of the avengers?\")\n"
|
115 |
]
|
116 |
},
|
|
|
125 |
"\n",
|
126 |
"In order to run Milvus you need to follow [this guide](https://milvus.io/docs/example_code.md) to download a docker compose file and run it.\n",
|
127 |
"The command is:\n",
|
128 |
+
"\n",
|
129 |
"```bash\n",
|
130 |
"wget https://raw.githubusercontent.com/milvus-io/pymilvus/v2.2.7/examples/hello_milvus.py\n",
|
131 |
"```\n",
|
132 |
+
"\n",
|
133 |
+
"You can then [install Attu](https://milvus.io/docs/attu_install-docker.md) as a management tool for Milvus\n"
|
134 |
]
|
135 |
},
|
136 |
{
|
137 |
"cell_type": "code",
|
138 |
+
"execution_count": 3,
|
139 |
"metadata": {},
|
140 |
"outputs": [
|
141 |
{
|
|
|
151 |
"'The first roster of the Avengers included Iron Man, Thor, Hulk, Ant-Man, and the Wasp.'"
|
152 |
]
|
153 |
},
|
154 |
+
"execution_count": 3,
|
155 |
"metadata": {},
|
156 |
"output_type": "execute_result"
|
157 |
}
|
158 |
],
|
159 |
"source": [
|
160 |
+
"from megabots import bot\n",
|
|
|
|
|
|
|
161 |
"\n",
|
162 |
+
"# Attach a vectorstore by passing the name of the database. Default port for milvus is 19530 and default host is localhost\n",
|
163 |
"# Point it to your files directory so that it can index the files and add them to the vectorstore\n",
|
164 |
+
"bot = bot(\"qna-over-docs\", index=\"./examples/files/\", vectorstore=\"milvus\")\n",
|
165 |
"\n",
|
166 |
"bot.ask(\"what was the first roster of the avengers?\")\n"
|
167 |
]
|
168 |
+
},
|
169 |
+
{
|
170 |
+
"attachments": {},
|
171 |
+
"cell_type": "markdown",
|
172 |
+
"metadata": {},
|
173 |
+
"source": [
|
174 |
+
"Or use the `vectorstore` factory function for more customisation\n"
|
175 |
+
]
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"cell_type": "code",
|
179 |
+
"execution_count": 4,
|
180 |
+
"metadata": {},
|
181 |
+
"outputs": [
|
182 |
+
{
|
183 |
+
"name": "stdout",
|
184 |
+
"output_type": "stream",
|
185 |
+
"text": [
|
186 |
+
"Using model: gpt-3.5-turbo\n"
|
187 |
+
]
|
188 |
+
}
|
189 |
+
],
|
190 |
+
"source": [
|
191 |
+
"from megabots import bot, vectorstore\n",
|
192 |
+
"\n",
|
193 |
+
"milvus = vectorstore(\"milvus\", host=\"localhost\", port=19530)\n",
|
194 |
+
"\n",
|
195 |
+
"bot = bot(\"qna-over-docs\", index=\"./examples/files/\", vectorstore=milvus)\n"
|
196 |
+
]
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"attachments": {},
|
200 |
+
"cell_type": "markdown",
|
201 |
+
"metadata": {},
|
202 |
+
"source": [
|
203 |
+
"### Working with memory\n",
|
204 |
+
"\n",
|
205 |
+
"You can easily add memory to your `bot` using the `memory` parameter. It accepts a string with the type of the memory to be used. This defaults to some sane dafaults.\n",
|
206 |
+
"Should you need more configuration, you can use the `memory` function and pass the type of memory and the configuration you need.\n"
|
207 |
+
]
|
208 |
+
},
|
209 |
+
{
|
210 |
+
"cell_type": "code",
|
211 |
+
"execution_count": 5,
|
212 |
+
"metadata": {},
|
213 |
+
"outputs": [
|
214 |
+
{
|
215 |
+
"name": "stdout",
|
216 |
+
"output_type": "stream",
|
217 |
+
"text": [
|
218 |
+
"Using model: gpt-3.5-turbo\n",
|
219 |
+
"Loading path from pickle file: ./index.pkl ...\n",
|
220 |
+
"Iron Man is a superhero character who is a member of the Avengers. He is known for his high-tech suit of armor and his alter ego, Tony Stark.\n",
|
221 |
+
"Yes, Iron Man was part of the original Avengers lineup.\n"
|
222 |
+
]
|
223 |
+
}
|
224 |
+
],
|
225 |
+
"source": [
|
226 |
+
"from megabots import bot\n",
|
227 |
+
"\n",
|
228 |
+
"qnabot = bot(\"qna-over-docs\", index=\"./index.pkl\", memory=\"conversation-buffer\")\n",
|
229 |
+
"\n",
|
230 |
+
"print(qnabot.ask(\"who is iron man?\"))\n",
|
231 |
+
"print(qnabot.ask(\"was he in the first roster?\"))\n"
|
232 |
+
]
|
233 |
+
},
|
234 |
+
{
|
235 |
+
"attachments": {},
|
236 |
+
"cell_type": "markdown",
|
237 |
+
"metadata": {},
|
238 |
+
"source": [
|
239 |
+
"Or using the `memory`factory function"
|
240 |
+
]
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"cell_type": "code",
|
244 |
+
"execution_count": 6,
|
245 |
+
"metadata": {},
|
246 |
+
"outputs": [
|
247 |
+
{
|
248 |
+
"name": "stdout",
|
249 |
+
"output_type": "stream",
|
250 |
+
"text": [
|
251 |
+
"Using model: gpt-3.5-turbo\n",
|
252 |
+
"Loading path from pickle file: ./index.pkl ...\n",
|
253 |
+
"Iron Man is a superhero character who is a member of the Avengers. He is known for his high-tech suit of armor and his alter ego, Tony Stark.\n",
|
254 |
+
"Yes, Iron Man was part of the original Avengers lineup.\n"
|
255 |
+
]
|
256 |
+
}
|
257 |
+
],
|
258 |
+
"source": [
|
259 |
+
"from megabots import bot, memory\n",
|
260 |
+
"\n",
|
261 |
+
"qnabot = bot(\n",
|
262 |
+
" \"qna-over-docs\",\n",
|
263 |
+
" index=\"./index.pkl\",\n",
|
264 |
+
" memory=memory(\"conversation-buffer-window\", k=5),\n",
|
265 |
+
")\n",
|
266 |
+
"\n",
|
267 |
+
"print(qnabot.ask(\"who is iron man?\"))\n",
|
268 |
+
"print(qnabot.ask(\"was he in the first roster?\"))\n"
|
269 |
+
]
|
270 |
+
},
|
271 |
+
{
|
272 |
+
"attachments": {},
|
273 |
+
"cell_type": "markdown",
|
274 |
+
"metadata": {},
|
275 |
+
"source": [
|
276 |
+
"NOTE: For the `qna-over-docs` bot, when using memory and passing your custom prompt, it is important to remember to pass one more variable to your custom prompt to facilitate for chat history. The variable name is `history`.\n"
|
277 |
+
]
|
278 |
+
},
|
279 |
+
{
|
280 |
+
"cell_type": "code",
|
281 |
+
"execution_count": 7,
|
282 |
+
"metadata": {},
|
283 |
+
"outputs": [
|
284 |
+
{
|
285 |
+
"name": "stdout",
|
286 |
+
"output_type": "stream",
|
287 |
+
"text": [
|
288 |
+
"Using model: gpt-3.5-turbo\n",
|
289 |
+
"Loading path from pickle file: ./index.pkl ...\n",
|
290 |
+
"Iron Man is a superhero character who is a member of the Avengers. He is a wealthy businessman named Tony Stark who uses his advanced technology to create a suit of armor that gives him superhuman abilities.\n",
|
291 |
+
"Yes, Iron Man was part of the original Avengers lineup.\n"
|
292 |
+
]
|
293 |
+
}
|
294 |
+
],
|
295 |
+
"source": [
|
296 |
+
"from megabots import bot\n",
|
297 |
+
"\n",
|
298 |
+
"prompt = \"\"\"\n",
|
299 |
+
"Use the following pieces of context to answer the question at the end. \n",
|
300 |
+
"If you don't know the answer, just say that you don't know, don't try to make up an answer.\n",
|
301 |
+
"\n",
|
302 |
+
"{context}\n",
|
303 |
+
"\n",
|
304 |
+
"{history}\n",
|
305 |
+
"Human: {question}\n",
|
306 |
+
"AI:\"\"\"\n",
|
307 |
+
"\n",
|
308 |
+
"qnabot = bot(\n",
|
309 |
+
" \"qna-over-docs\",\n",
|
310 |
+
" prompt=prompt,\n",
|
311 |
+
" index=\"./index.pkl\",\n",
|
312 |
+
" memory=\"conversation-buffer\",\n",
|
313 |
+
")\n",
|
314 |
+
"\n",
|
315 |
+
"print(qnabot.ask(\"who is iron man?\"))\n",
|
316 |
+
"print(qnabot.ask(\"was he in the first roster?\"))"
|
317 |
+
]
|
318 |
}
|
319 |
],
|
320 |
"metadata": {
|
megabots/__init__.py
CHANGED
@@ -1,225 +1,10 @@
|
|
1 |
-
from
|
2 |
-
from
|
3 |
-
from
|
4 |
-
from
|
5 |
-
from
|
6 |
-
from langchain.vectorstores.faiss import FAISS
|
7 |
-
import gradio as gr
|
8 |
-
from fastapi import FastAPI
|
9 |
-
import pickle
|
10 |
-
import os
|
11 |
-
from dotenv import load_dotenv
|
12 |
-
from langchain.prompts import PromptTemplate
|
13 |
-
from langchain.chains.question_answering import load_qa_chain
|
14 |
-
from langchain.chains.conversational_retrieval.prompts import QA_PROMPT
|
15 |
-
from langchain.document_loaders import DirectoryLoader
|
16 |
-
from megabots.vectorstores import VectorStore, vectorstore
|
17 |
-
|
18 |
-
load_dotenv()
|
19 |
-
|
20 |
-
|
21 |
-
class Bot:
|
22 |
-
def __init__(
|
23 |
-
self,
|
24 |
-
model: str | None = None,
|
25 |
-
prompt_template: str | None = None,
|
26 |
-
prompt_variables: list[str] | None = None,
|
27 |
-
index: str | None = None,
|
28 |
-
sources: bool | None = False,
|
29 |
-
vectorstore: VectorStore | None = None,
|
30 |
-
memory: str | None = None,
|
31 |
-
verbose: bool = False,
|
32 |
-
temperature: int = 0,
|
33 |
-
):
|
34 |
-
self.select_model(model, temperature)
|
35 |
-
self.create_loader(index)
|
36 |
-
self.load_or_create_index(index, vectorstore)
|
37 |
-
|
38 |
-
# Load the question-answering chain for the selected model
|
39 |
-
self.chain = self.create_chain(
|
40 |
-
prompt_template, prompt_variables, sources=sources, verbose=verbose
|
41 |
-
)
|
42 |
-
|
43 |
-
def create_chain(
|
44 |
-
self,
|
45 |
-
prompt_template: str | None = None,
|
46 |
-
prompt_variables: list[str] | None = None,
|
47 |
-
sources: bool | None = False,
|
48 |
-
verbose: bool = False,
|
49 |
-
):
|
50 |
-
prompt = (
|
51 |
-
PromptTemplate(template=prompt_template, input_variables=prompt_variables)
|
52 |
-
if prompt_template is not None and prompt_variables is not None
|
53 |
-
else QA_PROMPT
|
54 |
-
)
|
55 |
-
# TODO: Changing the prompt here is not working. Leave it as is for now.
|
56 |
-
# Reference: https://github.com/hwchase17/langchain/issues/2858
|
57 |
-
if sources:
|
58 |
-
return load_qa_with_sources_chain(
|
59 |
-
self.llm, chain_type="stuff", verbose=verbose
|
60 |
-
)
|
61 |
-
return load_qa_chain(
|
62 |
-
self.llm, chain_type="stuff", verbose=verbose, prompt=prompt
|
63 |
-
)
|
64 |
-
|
65 |
-
def select_model(self, model: str | None, temperature: float):
|
66 |
-
# Select and set the appropriate model based on the provided input
|
67 |
-
if model is None or model == "gpt-3.5-turbo":
|
68 |
-
print("Using model: gpt-3.5-turbo")
|
69 |
-
self.llm = ChatOpenAI(temperature=temperature)
|
70 |
-
|
71 |
-
if model == "text-davinci-003":
|
72 |
-
print("Using model: text-davinci-003")
|
73 |
-
self.llm = OpenAI(temperature=temperature)
|
74 |
-
|
75 |
-
def create_loader(self, index: str | None):
|
76 |
-
# Create a loader based on the provided directory (either local or S3)
|
77 |
-
if index is None:
|
78 |
-
raise RuntimeError(
|
79 |
-
"""
|
80 |
-
Impossible to find a valid index.
|
81 |
-
Either provide a valid path to a pickle file or a directory.
|
82 |
-
"""
|
83 |
-
)
|
84 |
-
self.loader = DirectoryLoader(index, recursive=True)
|
85 |
-
|
86 |
-
def load_or_create_index(self, index: str, vectorstore: VectorStore | None = None):
|
87 |
-
# Load an existing index from disk or create a new one if not available
|
88 |
-
if vectorstore is not None:
|
89 |
-
self.search_index = vectorstore.client.from_documents(
|
90 |
-
self.loader.load_and_split(),
|
91 |
-
OpenAIEmbeddings(),
|
92 |
-
connection_args={"host": vectorstore.host, "port": vectorstore.port},
|
93 |
-
)
|
94 |
-
return
|
95 |
-
|
96 |
-
# Is pickle
|
97 |
-
if index is not None and "pkl" in index or "pickle" in index:
|
98 |
-
print("Loading path from pickle file: ", index, "...")
|
99 |
-
with open(index, "rb") as f:
|
100 |
-
self.search_index = pickle.load(f)
|
101 |
-
return
|
102 |
-
|
103 |
-
# Is directory
|
104 |
-
if index is not None and os.path.isdir(index):
|
105 |
-
print("Creating index...")
|
106 |
-
self.search_index = FAISS.from_documents(
|
107 |
-
self.loader.load_and_split(), OpenAIEmbeddings()
|
108 |
-
)
|
109 |
-
return
|
110 |
-
|
111 |
-
raise RuntimeError(
|
112 |
-
"""
|
113 |
-
Impossible to find a valid index.
|
114 |
-
Either provide a valid path to a pickle file or a directory.
|
115 |
-
"""
|
116 |
-
)
|
117 |
-
|
118 |
-
def save_index(self, index_path: str):
|
119 |
-
# Save the index to the specified path
|
120 |
-
with open(index_path, "wb") as f:
|
121 |
-
pickle.dump(self.search_index, f)
|
122 |
-
|
123 |
-
def ask(self, question: str, k=1) -> str:
|
124 |
-
# Retrieve the answer to the given question and return it
|
125 |
-
input_documents = self.search_index.similarity_search(question, k=k)
|
126 |
-
answer = self.chain.run(input_documents=input_documents, question=question)
|
127 |
-
return answer
|
128 |
-
|
129 |
-
|
130 |
-
SUPPORTED_TASKS = {
|
131 |
-
"qna-over-docs": {
|
132 |
-
"impl": Bot,
|
133 |
-
"default": {
|
134 |
-
"model": "gpt-3.5-turbo",
|
135 |
-
"temperature": 0,
|
136 |
-
"index": "./index",
|
137 |
-
},
|
138 |
-
}
|
139 |
-
}
|
140 |
|
141 |
-
SUPPORTED_MODELS = {}
|
142 |
|
|
|
143 |
|
144 |
-
|
145 |
-
task: str | None = None,
|
146 |
-
model: str | None = None,
|
147 |
-
index: str | None = None,
|
148 |
-
prompt_template: str | None = None,
|
149 |
-
prompt_variables: list[str] | None = None,
|
150 |
-
verbose: bool = False,
|
151 |
-
temperature: int = 0,
|
152 |
-
**kwargs,
|
153 |
-
) -> Bot:
|
154 |
-
"""Instanciate a bot based on the provided task. Each supported tasks has it's own default sane defaults.
|
155 |
-
|
156 |
-
Args:
|
157 |
-
task (str | None, optional): The given task. Can be one of the SUPPORTED_TASKS.
|
158 |
-
model (str | None, optional): Model to be used. Can be one of the SUPPORTED_MODELS.
|
159 |
-
index (str | None, optional): Data that the model will load and store index info.
|
160 |
-
Can be either a local file path, a pickle file, or a url of a vector database.
|
161 |
-
By default it will look for a local directory called "files" in the current working directory.
|
162 |
-
prompt_template (str | None, optional): Prompt template to be used. Specify variables with {var_name}.
|
163 |
-
prompt_variables (list[str] | None, optional): Prompt variables to be used in the prompt template.
|
164 |
-
verbose (bool, optional): Verbocity. Defaults to False.
|
165 |
-
temperature (int, optional): Temperature. Defaults to 0.
|
166 |
-
|
167 |
-
Raises:
|
168 |
-
RuntimeError: _description_
|
169 |
-
ValueError: _description_
|
170 |
-
|
171 |
-
Returns:
|
172 |
-
Bot: Bot instance
|
173 |
-
"""
|
174 |
-
|
175 |
-
if task is None:
|
176 |
-
raise RuntimeError("Impossible to instantiate a bot without a task.")
|
177 |
-
if task not in SUPPORTED_TASKS:
|
178 |
-
raise ValueError(f"Task {task} is not supported.")
|
179 |
-
|
180 |
-
task_defaults = SUPPORTED_TASKS[task]["default"]
|
181 |
-
return SUPPORTED_TASKS[task]["impl"](
|
182 |
-
model=model or task_defaults["model"],
|
183 |
-
index=index or task_defaults["index"],
|
184 |
-
prompt_template=prompt_template,
|
185 |
-
prompt_variables=prompt_variables,
|
186 |
-
temperature=temperature,
|
187 |
-
verbose=verbose,
|
188 |
-
**kwargs,
|
189 |
-
)
|
190 |
-
|
191 |
-
|
192 |
-
def create_api(bot: Bot):
|
193 |
-
app = FastAPI()
|
194 |
-
|
195 |
-
@app.get("/v1/ask/{question}")
|
196 |
-
async def ask(question: str):
|
197 |
-
answer = bot.ask(question)
|
198 |
-
return {"answer": answer}
|
199 |
-
|
200 |
-
return app
|
201 |
-
|
202 |
-
|
203 |
-
def create_interface(bot_instance: Bot, examples: list[list[str]] = []):
|
204 |
-
with gr.Blocks() as interface:
|
205 |
-
chatbot = gr.Chatbot([], elem_id="chatbot").style(height=750)
|
206 |
-
msg = gr.Textbox(
|
207 |
-
show_label=False,
|
208 |
-
placeholder="Enter text and press enter, or upload an image",
|
209 |
-
).style(container=False)
|
210 |
-
clear = gr.Button("Clear")
|
211 |
-
|
212 |
-
def user(user_message, history):
|
213 |
-
return "", history + [[user_message, None]]
|
214 |
-
|
215 |
-
def bot(history):
|
216 |
-
response = bot_instance.ask(history[-1][0])
|
217 |
-
history[-1][1] = response
|
218 |
-
return history
|
219 |
-
|
220 |
-
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
221 |
-
bot, chatbot, chatbot
|
222 |
-
)
|
223 |
-
clear.click(lambda: None, None, chatbot, queue=False)
|
224 |
-
|
225 |
-
return interface
|
|
|
1 |
+
from megabots.vectorstore import VectorStore, vectorstore
|
2 |
+
from megabots.memory import Memory, memory
|
3 |
+
from megabots.bot import Bot, bot
|
4 |
+
from megabots.prompt import prompt
|
5 |
+
from megabots.utils import create_api, create_interface
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
|
|
7 |
|
8 |
+
from dotenv import load_dotenv
|
9 |
|
10 |
+
load_dotenv()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
megabots/bot.py
ADDED
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any
|
2 |
+
from langchain.llms import OpenAI
|
3 |
+
from langchain.chat_models import ChatOpenAI
|
4 |
+
from langchain.embeddings import OpenAIEmbeddings
|
5 |
+
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
6 |
+
from langchain.vectorstores.faiss import FAISS
|
7 |
+
import pickle
|
8 |
+
import os
|
9 |
+
from langchain.prompts import PromptTemplate
|
10 |
+
from langchain.chains.question_answering import load_qa_chain
|
11 |
+
from langchain.chains.conversational_retrieval.prompts import QA_PROMPT
|
12 |
+
from langchain.document_loaders import DirectoryLoader
|
13 |
+
from megabots.prompt import QA_MEMORY_PROMPT
|
14 |
+
from megabots.vectorstore import VectorStore
|
15 |
+
from megabots.memory import Memory
|
16 |
+
import megabots
|
17 |
+
|
18 |
+
|
19 |
+
class Bot:
|
20 |
+
def __init__(
|
21 |
+
self,
|
22 |
+
model: str | None = None,
|
23 |
+
prompt: PromptTemplate | None = None,
|
24 |
+
index: str | None = None,
|
25 |
+
sources: bool | None = False,
|
26 |
+
vectorstore: VectorStore | None = None,
|
27 |
+
memory: Memory | None = None,
|
28 |
+
verbose: bool = False,
|
29 |
+
temperature: int = 0,
|
30 |
+
):
|
31 |
+
self.vectorstore = vectorstore
|
32 |
+
self.memory = memory
|
33 |
+
self.prompt = prompt or QA_MEMORY_PROMPT if self.memory else QA_PROMPT
|
34 |
+
self.select_model(model, temperature)
|
35 |
+
self.create_loader(index)
|
36 |
+
self.load_or_create_index(index, vectorstore)
|
37 |
+
|
38 |
+
# Load the question-answering chain for the selected model
|
39 |
+
self.chain = self.create_chain(sources=sources, verbose=verbose)
|
40 |
+
|
41 |
+
def create_chain(
|
42 |
+
self,
|
43 |
+
sources: bool | None = False,
|
44 |
+
verbose: bool = False,
|
45 |
+
):
|
46 |
+
# TODO: Changing the prompt here is not working. Leave it as is for now.
|
47 |
+
# Reference: https://github.com/hwchase17/langchain/issues/2858
|
48 |
+
if sources:
|
49 |
+
return load_qa_with_sources_chain(
|
50 |
+
self.llm,
|
51 |
+
chain_type="stuff",
|
52 |
+
memory=self.memory.memory if self.memory else None,
|
53 |
+
verbose=verbose,
|
54 |
+
)
|
55 |
+
return load_qa_chain(
|
56 |
+
self.llm,
|
57 |
+
chain_type="stuff",
|
58 |
+
verbose=verbose,
|
59 |
+
prompt=self.prompt,
|
60 |
+
memory=self.memory.memory if self.memory else None,
|
61 |
+
)
|
62 |
+
|
63 |
+
def select_model(self, model: str | None, temperature: float):
|
64 |
+
# Select and set the appropriate model based on the provided input
|
65 |
+
if model is None or model == "gpt-3.5-turbo":
|
66 |
+
print("Using model: gpt-3.5-turbo")
|
67 |
+
self.llm = ChatOpenAI(temperature=temperature)
|
68 |
+
|
69 |
+
if model == "text-davinci-003":
|
70 |
+
print("Using model: text-davinci-003")
|
71 |
+
self.llm = OpenAI(temperature=temperature)
|
72 |
+
|
73 |
+
def create_loader(self, index: str | None):
|
74 |
+
# Create a loader based on the provided directory (either local or S3)
|
75 |
+
if index is None:
|
76 |
+
raise RuntimeError(
|
77 |
+
"""
|
78 |
+
Impossible to find a valid index.
|
79 |
+
Either provide a valid path to a pickle file or a directory.
|
80 |
+
"""
|
81 |
+
)
|
82 |
+
self.loader = DirectoryLoader(index, recursive=True)
|
83 |
+
|
84 |
+
def load_or_create_index(self, index: str, vectorstore: VectorStore | None = None):
|
85 |
+
# Load an existing index from disk or create a new one if not available
|
86 |
+
if vectorstore is not None:
|
87 |
+
self.search_index = vectorstore.client.from_documents(
|
88 |
+
self.loader.load_and_split(),
|
89 |
+
OpenAIEmbeddings(),
|
90 |
+
connection_args={"host": vectorstore.host, "port": vectorstore.port},
|
91 |
+
)
|
92 |
+
return
|
93 |
+
|
94 |
+
# Is pickle
|
95 |
+
if index is not None and "pkl" in index or "pickle" in index:
|
96 |
+
print("Loading path from pickle file: ", index, "...")
|
97 |
+
with open(index, "rb") as f:
|
98 |
+
self.search_index = pickle.load(f)
|
99 |
+
return
|
100 |
+
|
101 |
+
# Is directory
|
102 |
+
if index is not None and os.path.isdir(index):
|
103 |
+
print("Creating index...")
|
104 |
+
self.search_index = FAISS.from_documents(
|
105 |
+
self.loader.load_and_split(), OpenAIEmbeddings()
|
106 |
+
)
|
107 |
+
return
|
108 |
+
|
109 |
+
raise RuntimeError(
|
110 |
+
"""
|
111 |
+
Impossible to find a valid index.
|
112 |
+
Either provide a valid path to a pickle file or a directory.
|
113 |
+
"""
|
114 |
+
)
|
115 |
+
|
116 |
+
def save_index(self, index_path: str):
|
117 |
+
# Save the index to the specified path
|
118 |
+
with open(index_path, "wb") as f:
|
119 |
+
pickle.dump(self.search_index, f)
|
120 |
+
|
121 |
+
def ask(self, question: str, k=1) -> str:
|
122 |
+
# Retrieve the answer to the given question and return it
|
123 |
+
input_documents = self.search_index.similarity_search(question, k=k)
|
124 |
+
answer = self.chain.run(input_documents=input_documents, question=question)
|
125 |
+
return answer
|
126 |
+
|
127 |
+
|
128 |
+
SUPPORTED_TASKS = {
|
129 |
+
"qna-over-docs": {
|
130 |
+
"impl": Bot,
|
131 |
+
"default": {
|
132 |
+
"model": "gpt-3.5-turbo",
|
133 |
+
"temperature": 0,
|
134 |
+
"index": "./index",
|
135 |
+
"input_variables": ["context", "question"],
|
136 |
+
},
|
137 |
+
}
|
138 |
+
}
|
139 |
+
|
140 |
+
SUPPORTED_MODELS = {}
|
141 |
+
|
142 |
+
|
143 |
+
def bot(
|
144 |
+
task: str | None = None,
|
145 |
+
*,
|
146 |
+
model: str | None = None,
|
147 |
+
index: str | None = None,
|
148 |
+
prompt: str | None = None,
|
149 |
+
memory: str | Memory | None = None,
|
150 |
+
vectorstore: str | VectorStore | None = None,
|
151 |
+
verbose: bool = False,
|
152 |
+
temperature: int = 0,
|
153 |
+
) -> Bot:
|
154 |
+
"""Instanciate a bot based on the provided task. Each supported tasks has it's own default sane defaults.
|
155 |
+
|
156 |
+
Args:
|
157 |
+
task (str | None, optional): The given task. Can be one of the SUPPORTED_TASKS.
|
158 |
+
|
159 |
+
model (str | None, optional): Model to be used. Can be one of the SUPPORTED_MODELS.
|
160 |
+
|
161 |
+
index (str | None, optional): Data that the model will load and store index info.
|
162 |
+
Can be either a local file path, a pickle file, or a url of a vector database.
|
163 |
+
By default it will look for a local directory called "files" in the current working directory.
|
164 |
+
|
165 |
+
prompt (str | None, optional): The prompt that the bot will take in. Mark variables like this: {variable}.
|
166 |
+
Variables are context, question, and history if the bot has memory.
|
167 |
+
|
168 |
+
vectorstore: (str | VectorStore | None, optional): The vectorstore that the bot will save the index to.
|
169 |
+
If only a string is passed, the defaults values willl be used.
|
170 |
+
|
171 |
+
verbose (bool, optional): Verbocity. Defaults to False.
|
172 |
+
|
173 |
+
temperature (int, optional): Temperature. Defaults to 0.
|
174 |
+
|
175 |
+
Raises:
|
176 |
+
RuntimeError: _description_
|
177 |
+
ValueError: _description_
|
178 |
+
|
179 |
+
Returns:
|
180 |
+
Bot: Bot instance
|
181 |
+
"""
|
182 |
+
|
183 |
+
if task is None:
|
184 |
+
raise RuntimeError("Impossible to instantiate a bot without a task.")
|
185 |
+
if task not in SUPPORTED_TASKS:
|
186 |
+
raise ValueError(f"Task {task} is not supported.")
|
187 |
+
|
188 |
+
task_defaults = SUPPORTED_TASKS[task]["default"]
|
189 |
+
|
190 |
+
if memory is not None:
|
191 |
+
task_defaults["input_variables"].append("history")
|
192 |
+
|
193 |
+
return SUPPORTED_TASKS[task]["impl"](
|
194 |
+
model=model or task_defaults["model"],
|
195 |
+
index=index or task_defaults["index"],
|
196 |
+
prompt=None
|
197 |
+
if prompt is None
|
198 |
+
else PromptTemplate(
|
199 |
+
template=prompt, input_variables=task_defaults["input_variables"]
|
200 |
+
),
|
201 |
+
temperature=temperature,
|
202 |
+
verbose=verbose,
|
203 |
+
vectorstore=megabots.vectorstore(vectorstore)
|
204 |
+
if isinstance(vectorstore, str)
|
205 |
+
else vectorstore,
|
206 |
+
memory=megabots.memory(memory) if isinstance(memory, str) else memory,
|
207 |
+
)
|
megabots/memory.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory
|
2 |
+
|
3 |
+
|
4 |
+
class ConversationBuffer:
|
5 |
+
def __init__(self):
|
6 |
+
self.memory = ConversationBufferMemory(input_key="question")
|
7 |
+
|
8 |
+
|
9 |
+
class ConversationBufferWindow:
|
10 |
+
def __init__(self, k: int):
|
11 |
+
self.k: int = k
|
12 |
+
self.memory = ConversationBufferWindowMemory(k=self.k, input_key="question")
|
13 |
+
|
14 |
+
|
15 |
+
SUPPORTED_MEMORY = {
|
16 |
+
"conversation-buffer": {
|
17 |
+
"impl": ConversationBuffer,
|
18 |
+
"default": {},
|
19 |
+
},
|
20 |
+
"conversation-buffer-window": {
|
21 |
+
"impl": ConversationBufferWindow,
|
22 |
+
"default": {"k": 3},
|
23 |
+
},
|
24 |
+
}
|
25 |
+
|
26 |
+
|
27 |
+
Memory = type("Memory", (ConversationBuffer, ConversationBufferWindow), {})
|
28 |
+
|
29 |
+
|
30 |
+
def memory(
|
31 |
+
name: str = "conversation-buffer-window",
|
32 |
+
k: int | None = None,
|
33 |
+
) -> Memory:
|
34 |
+
if name is None:
|
35 |
+
raise RuntimeError("Impossible to instantiate memory without a name.")
|
36 |
+
|
37 |
+
if name not in SUPPORTED_MEMORY:
|
38 |
+
raise ValueError(f"Memory {name} is not supported.")
|
39 |
+
|
40 |
+
cl = SUPPORTED_MEMORY[name]["impl"]
|
41 |
+
|
42 |
+
if name == "conversation-buffer-window":
|
43 |
+
return cl(k=k)
|
44 |
+
|
45 |
+
return SUPPORTED_MEMORY[name]["impl"]()
|
megabots/prompt.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
from langchain import PromptTemplate
|
3 |
+
|
4 |
+
QNA_TEMPLATE = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
5 |
+
|
6 |
+
{context}
|
7 |
+
|
8 |
+
{history}
|
9 |
+
Human: {question}
|
10 |
+
AI:"""
|
11 |
+
|
12 |
+
QA_MEMORY_PROMPT = PromptTemplate(
|
13 |
+
template=QNA_TEMPLATE, input_variables=["context", "history", "question"]
|
14 |
+
)
|
15 |
+
|
16 |
+
|
17 |
+
def prompt(template: str, variables: List[str]):
|
18 |
+
return PromptTemplate(template=template, input_variables=variables)
|
megabots/utils.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from fastapi import FastAPI
|
3 |
+
from megabots.bot import Bot
|
4 |
+
|
5 |
+
|
6 |
+
def create_api(bot: Bot):
|
7 |
+
app = FastAPI()
|
8 |
+
|
9 |
+
@app.get("/v1/ask/{question}")
|
10 |
+
async def ask(question: str):
|
11 |
+
answer = bot.ask(question)
|
12 |
+
return {"answer": answer}
|
13 |
+
|
14 |
+
return app
|
15 |
+
|
16 |
+
|
17 |
+
def create_interface(bot_instance: Bot, examples: list[list[str]] = []):
|
18 |
+
with gr.Blocks() as interface:
|
19 |
+
chatbot = gr.Chatbot([], elem_id="chatbot").style(height=750)
|
20 |
+
msg = gr.Textbox(
|
21 |
+
show_label=False,
|
22 |
+
placeholder="Enter text and press enter, or upload an image",
|
23 |
+
).style(container=False)
|
24 |
+
clear = gr.Button("Clear")
|
25 |
+
|
26 |
+
def user(user_message, history):
|
27 |
+
return "", history + [[user_message, None]]
|
28 |
+
|
29 |
+
def bot(history):
|
30 |
+
response = bot_instance.ask(history[-1][0])
|
31 |
+
history[-1][1] = response
|
32 |
+
return history
|
33 |
+
|
34 |
+
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
35 |
+
bot, chatbot, chatbot
|
36 |
+
)
|
37 |
+
clear.click(lambda: None, None, chatbot, queue=False)
|
38 |
+
|
39 |
+
return interface
|
megabots/{vectorstores.py β vectorstore.py}
RENAMED
@@ -1,5 +1,5 @@
|
|
1 |
from typing import Type, TypeVar
|
2 |
-
from langchain.vectorstores import Milvus
|
3 |
from abc import ABC
|
4 |
|
5 |
|
@@ -26,7 +26,9 @@ SUPPORTED_VECTORSTORES = {
|
|
26 |
}
|
27 |
|
28 |
|
29 |
-
def vectorstore(
|
|
|
|
|
30 |
"""Return a vectorstore object."""
|
31 |
|
32 |
if name is None:
|
@@ -36,6 +38,6 @@ def vectorstore(name: str) -> VectorStore:
|
|
36 |
raise ValueError(f"Vectorstore {name} is not supported.")
|
37 |
|
38 |
return SUPPORTED_VECTORSTORES[name]["impl"](
|
39 |
-
host=SUPPORTED_VECTORSTORES[name]["default"]["host"],
|
40 |
-
port=SUPPORTED_VECTORSTORES[name]["default"]["port"],
|
41 |
)
|
|
|
1 |
from typing import Type, TypeVar
|
2 |
+
from langchain.vectorstores import Milvus
|
3 |
from abc import ABC
|
4 |
|
5 |
|
|
|
26 |
}
|
27 |
|
28 |
|
29 |
+
def vectorstore(
|
30 |
+
name: str, host: str | None = None, port: int | None = None
|
31 |
+
) -> VectorStore:
|
32 |
"""Return a vectorstore object."""
|
33 |
|
34 |
if name is None:
|
|
|
38 |
raise ValueError(f"Vectorstore {name} is not supported.")
|
39 |
|
40 |
return SUPPORTED_VECTORSTORES[name]["impl"](
|
41 |
+
host=host or SUPPORTED_VECTORSTORES[name]["default"]["host"],
|
42 |
+
port=port or SUPPORTED_VECTORSTORES[name]["default"]["port"],
|
43 |
)
|
setup.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
from setuptools import setup, find_packages
|
2 |
|
3 |
-
VERSION = "0.0.
|
4 |
|
5 |
setup(
|
6 |
name="megabots",
|
|
|
1 |
from setuptools import setup, find_packages
|
2 |
|
3 |
+
VERSION = "0.0.11"
|
4 |
|
5 |
setup(
|
6 |
name="megabots",
|
tests/test_memory.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pytest import raises
|
2 |
+
from megabots import memory
|
3 |
+
from megabots.memory import ConversationBuffer, ConversationBufferWindow
|
4 |
+
|
5 |
+
|
6 |
+
def test_memory_conversation_buffer():
|
7 |
+
mem = memory(name="conversation-buffer")
|
8 |
+
assert isinstance(mem, ConversationBuffer)
|
9 |
+
|
10 |
+
|
11 |
+
def test_memory_conversation_buffer_window():
|
12 |
+
mem = memory(name="conversation-buffer-window", k=10)
|
13 |
+
assert isinstance(mem, ConversationBufferWindow)
|
14 |
+
|
15 |
+
|
16 |
+
def test_memory_unsupported_name():
|
17 |
+
with raises(ValueError, match=r"Memory invalid-name is not supported."):
|
18 |
+
memory(name="invalid-name")
|
19 |
+
|
20 |
+
|
21 |
+
def test_memory_no_name():
|
22 |
+
with raises(
|
23 |
+
RuntimeError, match=r"Impossible to instantiate memory without a name."
|
24 |
+
):
|
25 |
+
memory(name=None)
|