VTechAI commited on
Commit
8a41f4d
1 Parent(s): 14d00fc
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Dockerfile +29 -0
  2. __init__.py +0 -0
  3. api/__init__.py +0 -0
  4. api/__pycache__/__init__.cpython-310.pyc +0 -0
  5. api/answer/__init__.py +0 -0
  6. api/answer/__pycache__/__init__.cpython-310.pyc +0 -0
  7. api/answer/__pycache__/routes.cpython-310.pyc +0 -0
  8. api/answer/routes.py +371 -0
  9. api/internal/__init__.py +0 -0
  10. api/internal/__pycache__/__init__.cpython-310.pyc +0 -0
  11. api/internal/__pycache__/routes.cpython-310.pyc +0 -0
  12. api/internal/routes.py +69 -0
  13. api/user/__init__.py +0 -0
  14. api/user/__pycache__/__init__.cpython-310.pyc +0 -0
  15. api/user/__pycache__/routes.cpython-310.pyc +0 -0
  16. api/user/__pycache__/tasks.cpython-310.pyc +0 -0
  17. api/user/routes.py +321 -0
  18. api/user/tasks.py +7 -0
  19. app.py +44 -0
  20. celery.py +9 -0
  21. celeryconfig.py +8 -0
  22. core/__init__.py +0 -0
  23. core/__pycache__/__init__.cpython-310.pyc +0 -0
  24. core/__pycache__/settings.cpython-310.pyc +0 -0
  25. core/settings.py +44 -0
  26. error.py +15 -0
  27. index.faiss +0 -0
  28. index.pkl +3 -0
  29. indexes/local/patil2016.pdf/index.faiss +0 -0
  30. indexes/local/patil2016.pdf/index.pkl +3 -0
  31. inputs/local/patil2016.pdf/patil2016.pdf +0 -0
  32. llm/__init__.py +0 -0
  33. llm/__pycache__/__init__.cpython-310.pyc +0 -0
  34. llm/__pycache__/anthropic.cpython-310.pyc +0 -0
  35. llm/__pycache__/base.cpython-310.pyc +0 -0
  36. llm/__pycache__/docsgpt_provider.cpython-310.pyc +0 -0
  37. llm/__pycache__/huggingface.cpython-310.pyc +0 -0
  38. llm/__pycache__/llama_cpp.cpython-310.pyc +0 -0
  39. llm/__pycache__/llm_creator.cpython-310.pyc +0 -0
  40. llm/__pycache__/openai.cpython-310.pyc +0 -0
  41. llm/__pycache__/sagemaker.cpython-310.pyc +0 -0
  42. llm/anthropic.py +40 -0
  43. llm/base.py +14 -0
  44. llm/docsgpt_provider.py +49 -0
  45. llm/huggingface.py +44 -0
  46. llm/llama_cpp.py +39 -0
  47. llm/llm_creator.py +26 -0
  48. llm/openai.py +60 -0
  49. llm/sagemaker.py +139 -0
  50. parser/__init__.py +1 -0
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim-bullseye as builder
2
+
3
+ # Tiktoken requires Rust toolchain, so build it in a separate stage
4
+ RUN apt-get update && apt-get install -y gcc curl
5
+ RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && apt-get install --reinstall libc6-dev -y
6
+ ENV PATH="/root/.cargo/bin:${PATH}"
7
+ RUN pip install --upgrade pip && pip install tiktoken==0.5.2
8
+ COPY requirements.txt .
9
+ RUN pip install -r requirements.txt
10
+ RUN apt-get install -y wget unzip
11
+ RUN wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
12
+ RUN unzip mpnet-base-v2.zip -d model
13
+ RUN rm mpnet-base-v2.zip
14
+
15
+ FROM python:3.11-slim-bullseye
16
+
17
+ # Copy pre-built packages and binaries from builder stage
18
+ COPY --from=builder /usr/local/ /usr/local/
19
+
20
+ WORKDIR /app
21
+ COPY --from=builder /model /app/model
22
+
23
+ COPY . /app/application
24
+ ENV FLASK_APP=app.py
25
+ ENV FLASK_DEBUG=true
26
+
27
+ EXPOSE 7091
28
+
29
+ CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:7091", "application.wsgi:app"]
__init__.py ADDED
File without changes
api/__init__.py ADDED
File without changes
api/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (154 Bytes). View file
 
api/answer/__init__.py ADDED
File without changes
api/answer/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (161 Bytes). View file
 
api/answer/__pycache__/routes.cpython-310.pyc ADDED
Binary file (8.1 kB). View file
 
api/answer/routes.py ADDED
@@ -0,0 +1,371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ from flask import Blueprint, request, Response
4
+ import json
5
+ import datetime
6
+ import logging
7
+ import traceback
8
+
9
+ from pymongo import MongoClient
10
+ from bson.objectid import ObjectId
11
+ from transformers import GPT2TokenizerFast
12
+
13
+
14
+
15
+ from application.core.settings import settings
16
+ from application.vectorstore.vector_creator import VectorCreator
17
+ from application.llm.llm_creator import LLMCreator
18
+ from application.error import bad_request
19
+
20
+
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ mongo = MongoClient(settings.MONGO_URI)
25
+ db = mongo["docsgpt"]
26
+ conversations_collection = db["conversations"]
27
+ vectors_collection = db["vectors"]
28
+ prompts_collection = db["prompts"]
29
+ answer = Blueprint('answer', __name__)
30
+
31
+ if settings.LLM_NAME == "gpt4":
32
+ gpt_model = 'gpt-4'
33
+ elif settings.LLM_NAME == "anthropic":
34
+ gpt_model = 'claude-2'
35
+ else:
36
+ gpt_model = 'gpt-3.5-turbo'
37
+
38
+ # load the prompts
39
+ current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
40
+ with open(os.path.join(current_dir, "prompts", "chat_combine_default.txt"), "r") as f:
41
+ chat_combine_template = f.read()
42
+
43
+ with open(os.path.join(current_dir, "prompts", "chat_reduce_prompt.txt"), "r") as f:
44
+ chat_reduce_template = f.read()
45
+
46
+ with open(os.path.join(current_dir, "prompts", "chat_combine_creative.txt"), "r") as f:
47
+ chat_combine_creative = f.read()
48
+
49
+ with open(os.path.join(current_dir, "prompts", "chat_combine_strict.txt"), "r") as f:
50
+ chat_combine_strict = f.read()
51
+
52
+ api_key_set = settings.API_KEY is not None
53
+ embeddings_key_set = settings.EMBEDDINGS_KEY is not None
54
+
55
+
56
+ async def async_generate(chain, question, chat_history):
57
+ result = await chain.arun({"question": question, "chat_history": chat_history})
58
+ return result
59
+
60
+
61
+ def count_tokens(string):
62
+ tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
63
+ return len(tokenizer(string)['input_ids'])
64
+
65
+
66
+ def run_async_chain(chain, question, chat_history):
67
+ loop = asyncio.new_event_loop()
68
+ asyncio.set_event_loop(loop)
69
+ result = {}
70
+ try:
71
+ answer = loop.run_until_complete(async_generate(chain, question, chat_history))
72
+ finally:
73
+ loop.close()
74
+ result["answer"] = answer
75
+ return result
76
+
77
+
78
+ def get_vectorstore(data):
79
+ if "active_docs" in data:
80
+ if data["active_docs"].split("/")[0] == "default":
81
+ vectorstore = ""
82
+ elif data["active_docs"].split("/")[0] == "local":
83
+ vectorstore = "indexes/" + data["active_docs"]
84
+ else:
85
+ vectorstore = "vectors/" + data["active_docs"]
86
+ if data["active_docs"] == "default":
87
+ vectorstore = ""
88
+ else:
89
+ vectorstore = ""
90
+ vectorstore = os.path.join("application", vectorstore)
91
+ return vectorstore
92
+
93
+
94
+ def is_azure_configured():
95
+ return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME
96
+
97
+
98
+ def complete_stream(question, docsearch, chat_history, api_key, prompt_id, conversation_id):
99
+ llm = LLMCreator.create_llm(settings.LLM_NAME, api_key=api_key)
100
+
101
+ if prompt_id == 'default':
102
+ prompt = chat_combine_template
103
+ elif prompt_id == 'creative':
104
+ prompt = chat_combine_creative
105
+ elif prompt_id == 'strict':
106
+ prompt = chat_combine_strict
107
+ else:
108
+ prompt = prompts_collection.find_one({"_id": ObjectId(prompt_id)})["content"]
109
+
110
+ docs = docsearch.search(question, k=2)
111
+ if settings.LLM_NAME == "llama.cpp":
112
+ docs = [docs[0]]
113
+ # join all page_content together with a newline
114
+ docs_together = "\n".join([doc.page_content for doc in docs])
115
+ p_chat_combine = prompt.replace("{summaries}", docs_together)
116
+ messages_combine = [{"role": "system", "content": p_chat_combine}]
117
+ source_log_docs = []
118
+ for doc in docs:
119
+ if doc.metadata:
120
+ source_log_docs.append({"title": doc.metadata['title'].split('/')[-1], "text": doc.page_content})
121
+ else:
122
+ source_log_docs.append({"title": doc.page_content, "text": doc.page_content})
123
+
124
+ if len(chat_history) > 1:
125
+ tokens_current_history = 0
126
+ # count tokens in history
127
+ chat_history.reverse()
128
+ for i in chat_history:
129
+ if "prompt" in i and "response" in i:
130
+ tokens_batch = count_tokens(i["prompt"]) + count_tokens(i["response"])
131
+ if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
132
+ tokens_current_history += tokens_batch
133
+ messages_combine.append({"role": "user", "content": i["prompt"]})
134
+ messages_combine.append({"role": "system", "content": i["response"]})
135
+ messages_combine.append({"role": "user", "content": question})
136
+
137
+ response_full = ""
138
+ completion = llm.gen_stream(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME,
139
+ messages=messages_combine)
140
+ for line in completion:
141
+ data = json.dumps({"answer": str(line)})
142
+ response_full += str(line)
143
+ yield f"data: {data}\n\n"
144
+
145
+ # save conversation to database
146
+ if conversation_id is not None:
147
+ conversations_collection.update_one(
148
+ {"_id": ObjectId(conversation_id)},
149
+ {"$push": {"queries": {"prompt": question, "response": response_full, "sources": source_log_docs}}},
150
+ )
151
+
152
+ else:
153
+ # create new conversation
154
+ # generate summary
155
+ messages_summary = [{"role": "assistant", "content": "Summarise following conversation in no more than 3 "
156
+ "words, respond ONLY with the summary, use the same "
157
+ "language as the system \n\nUser: " + question + "\n\n" +
158
+ "AI: " +
159
+ response_full},
160
+ {"role": "user", "content": "Summarise following conversation in no more than 3 words, "
161
+ "respond ONLY with the summary, use the same language as the "
162
+ "system"}]
163
+
164
+ completion = llm.gen(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME,
165
+ messages=messages_summary, max_tokens=30)
166
+ conversation_id = conversations_collection.insert_one(
167
+ {"user": "local",
168
+ "date": datetime.datetime.utcnow(),
169
+ "name": completion,
170
+ "queries": [{"prompt": question, "response": response_full, "sources": source_log_docs}]}
171
+ ).inserted_id
172
+
173
+ # send data.type = "end" to indicate that the stream has ended as json
174
+ data = json.dumps({"type": "id", "id": str(conversation_id)})
175
+ yield f"data: {data}\n\n"
176
+ data = json.dumps({"type": "end"})
177
+ yield f"data: {data}\n\n"
178
+
179
+
180
+ @answer.route("/stream", methods=["POST"])
181
+ def stream():
182
+ data = request.get_json()
183
+ # get parameter from url question
184
+ question = data["question"]
185
+ history = data["history"]
186
+ # history to json object from string
187
+ history = json.loads(history)
188
+ conversation_id = data["conversation_id"]
189
+ if 'prompt_id' in data:
190
+ prompt_id = data["prompt_id"]
191
+ else:
192
+ prompt_id = 'default'
193
+
194
+ # check if active_docs is set
195
+
196
+ if not api_key_set:
197
+ api_key = data["api_key"]
198
+ else:
199
+ api_key = settings.API_KEY
200
+ if not embeddings_key_set:
201
+ embeddings_key = data["embeddings_key"]
202
+ else:
203
+ embeddings_key = settings.EMBEDDINGS_KEY
204
+ if "active_docs" in data:
205
+ vectorstore = get_vectorstore({"active_docs": data["active_docs"]})
206
+ else:
207
+ vectorstore = ""
208
+ docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, vectorstore, embeddings_key)
209
+
210
+ return Response(
211
+ complete_stream(question, docsearch,
212
+ chat_history=history, api_key=api_key,
213
+ prompt_id=prompt_id,
214
+ conversation_id=conversation_id), mimetype="text/event-stream"
215
+ )
216
+
217
+
218
+ @answer.route("/api/answer", methods=["POST"])
219
+ def api_answer():
220
+ data = request.get_json()
221
+ question = data["question"]
222
+ history = data["history"]
223
+ if "conversation_id" not in data:
224
+ conversation_id = None
225
+ else:
226
+ conversation_id = data["conversation_id"]
227
+ print("-" * 5)
228
+ if not api_key_set:
229
+ api_key = data["api_key"]
230
+ else:
231
+ api_key = settings.API_KEY
232
+ if not embeddings_key_set:
233
+ embeddings_key = data["embeddings_key"]
234
+ else:
235
+ embeddings_key = settings.EMBEDDINGS_KEY
236
+ if 'prompt_id' in data:
237
+ prompt_id = data["prompt_id"]
238
+ else:
239
+ prompt_id = 'default'
240
+
241
+ if prompt_id == 'default':
242
+ prompt = chat_combine_template
243
+ elif prompt_id == 'creative':
244
+ prompt = chat_combine_creative
245
+ elif prompt_id == 'strict':
246
+ prompt = chat_combine_strict
247
+ else:
248
+ prompt = prompts_collection.find_one({"_id": ObjectId(prompt_id)})["content"]
249
+
250
+ # use try and except to check for exception
251
+ try:
252
+ # check if the vectorstore is set
253
+ vectorstore = get_vectorstore(data)
254
+ # loading the index and the store and the prompt template
255
+ # Note if you have used other embeddings than OpenAI, you need to change the embeddings
256
+ docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, vectorstore, embeddings_key)
257
+
258
+
259
+ llm = LLMCreator.create_llm(settings.LLM_NAME, api_key=api_key)
260
+
261
+
262
+
263
+ docs = docsearch.search(question, k=2)
264
+ # join all page_content together with a newline
265
+ docs_together = "\n".join([doc.page_content for doc in docs])
266
+ p_chat_combine = prompt.replace("{summaries}", docs_together)
267
+ messages_combine = [{"role": "system", "content": p_chat_combine}]
268
+ source_log_docs = []
269
+ for doc in docs:
270
+ if doc.metadata:
271
+ source_log_docs.append({"title": doc.metadata['title'].split('/')[-1], "text": doc.page_content})
272
+ else:
273
+ source_log_docs.append({"title": doc.page_content, "text": doc.page_content})
274
+ # join all page_content together with a newline
275
+
276
+
277
+ if len(history) > 1:
278
+ tokens_current_history = 0
279
+ # count tokens in history
280
+ history.reverse()
281
+ for i in history:
282
+ if "prompt" in i and "response" in i:
283
+ tokens_batch = count_tokens(i["prompt"]) + count_tokens(i["response"])
284
+ if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
285
+ tokens_current_history += tokens_batch
286
+ messages_combine.append({"role": "user", "content": i["prompt"]})
287
+ messages_combine.append({"role": "system", "content": i["response"]})
288
+ messages_combine.append({"role": "user", "content": question})
289
+
290
+
291
+ completion = llm.gen(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME,
292
+ messages=messages_combine)
293
+
294
+
295
+ result = {"answer": completion, "sources": source_log_docs}
296
+ logger.debug(result)
297
+
298
+ # generate conversationId
299
+ if conversation_id is not None:
300
+ conversations_collection.update_one(
301
+ {"_id": ObjectId(conversation_id)},
302
+ {"$push": {"queries": {"prompt": question,
303
+ "response": result["answer"], "sources": result['sources']}}},
304
+ )
305
+
306
+ else:
307
+ # create new conversation
308
+ # generate summary
309
+ messages_summary = [
310
+ {"role": "assistant", "content": "Summarise following conversation in no more than 3 words, "
311
+ "respond ONLY with the summary, use the same language as the system \n\n"
312
+ "User: " + question + "\n\n" + "AI: " + result["answer"]},
313
+ {"role": "user", "content": "Summarise following conversation in no more than 3 words, "
314
+ "respond ONLY with the summary, use the same language as the system"}
315
+ ]
316
+
317
+ completion = llm.gen(
318
+ model=gpt_model,
319
+ engine=settings.AZURE_DEPLOYMENT_NAME,
320
+ messages=messages_summary,
321
+ max_tokens=30
322
+ )
323
+ conversation_id = conversations_collection.insert_one(
324
+ {"user": "local",
325
+ "date": datetime.datetime.utcnow(),
326
+ "name": completion,
327
+ "queries": [{"prompt": question, "response": result["answer"], "sources": source_log_docs}]}
328
+ ).inserted_id
329
+
330
+ result["conversation_id"] = str(conversation_id)
331
+
332
+ # mock result
333
+ # result = {
334
+ # "answer": "The answer is 42",
335
+ # "sources": ["https://en.wikipedia.org/wiki/42_(number)", "https://en.wikipedia.org/wiki/42_(number)"]
336
+ # }
337
+ return result
338
+ except Exception as e:
339
+ # print whole traceback
340
+ traceback.print_exc()
341
+ print(str(e))
342
+ return bad_request(500, str(e))
343
+
344
+
345
+ @answer.route("/api/search", methods=["POST"])
346
+ def api_search():
347
+ data = request.get_json()
348
+ # get parameter from url question
349
+ question = data["question"]
350
+
351
+ if not embeddings_key_set:
352
+ embeddings_key = data["embeddings_key"]
353
+ else:
354
+ embeddings_key = settings.EMBEDDINGS_KEY
355
+ if "active_docs" in data:
356
+ vectorstore = get_vectorstore({"active_docs": data["active_docs"]})
357
+ else:
358
+ vectorstore = ""
359
+ docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, vectorstore, embeddings_key)
360
+
361
+ docs = docsearch.search(question, k=2)
362
+
363
+ source_log_docs = []
364
+ for doc in docs:
365
+ if doc.metadata:
366
+ source_log_docs.append({"title": doc.metadata['title'].split('/')[-1], "text": doc.page_content})
367
+ else:
368
+ source_log_docs.append({"title": doc.page_content, "text": doc.page_content})
369
+ #yield f"data:{data}\n\n"
370
+ return source_log_docs
371
+
api/internal/__init__.py ADDED
File without changes
api/internal/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (163 Bytes). View file
 
api/internal/__pycache__/routes.cpython-310.pyc ADDED
Binary file (2.07 kB). View file
 
api/internal/routes.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import datetime
3
+ from flask import Blueprint, request, send_from_directory
4
+ from pymongo import MongoClient
5
+ from werkzeug.utils import secure_filename
6
+
7
+
8
+ from application.core.settings import settings
9
+ mongo = MongoClient(settings.MONGO_URI)
10
+ db = mongo["docsgpt"]
11
+ conversations_collection = db["conversations"]
12
+ vectors_collection = db["vectors"]
13
+
14
+ current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
15
+
16
+
17
+ internal = Blueprint('internal', __name__)
18
+ @internal.route("/api/download", methods=["get"])
19
+ def download_file():
20
+ user = secure_filename(request.args.get("user"))
21
+ job_name = secure_filename(request.args.get("name"))
22
+ filename = secure_filename(request.args.get("file"))
23
+ save_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, job_name)
24
+ return send_from_directory(save_dir, filename, as_attachment=True)
25
+
26
+
27
+
28
+ @internal.route("/api/upload_index", methods=["POST"])
29
+ def upload_index_files():
30
+ """Upload two files(index.faiss, index.pkl) to the user's folder."""
31
+ if "user" not in request.form:
32
+ return {"status": "no user"}
33
+ user = secure_filename(request.form["user"])
34
+ if "name" not in request.form:
35
+ return {"status": "no name"}
36
+ job_name = secure_filename(request.form["name"])
37
+ save_dir = os.path.join(current_dir, "indexes", user, job_name)
38
+ if settings.VECTOR_STORE == "faiss":
39
+ if "file_faiss" not in request.files:
40
+ print("No file part")
41
+ return {"status": "no file"}
42
+ file_faiss = request.files["file_faiss"]
43
+ if file_faiss.filename == "":
44
+ return {"status": "no file name"}
45
+ if "file_pkl" not in request.files:
46
+ print("No file part")
47
+ return {"status": "no file"}
48
+ file_pkl = request.files["file_pkl"]
49
+ if file_pkl.filename == "":
50
+ return {"status": "no file name"}
51
+ # saves index files
52
+
53
+ if not os.path.exists(save_dir):
54
+ os.makedirs(save_dir)
55
+ file_faiss.save(os.path.join(save_dir, "index.faiss"))
56
+ file_pkl.save(os.path.join(save_dir, "index.pkl"))
57
+ # create entry in vectors_collection
58
+ vectors_collection.insert_one(
59
+ {
60
+ "user": user,
61
+ "name": job_name,
62
+ "language": job_name,
63
+ "location": save_dir,
64
+ "date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
65
+ "model": settings.EMBEDDINGS_NAME,
66
+ "type": "local",
67
+ }
68
+ )
69
+ return {"status": "ok"}
api/user/__init__.py ADDED
File without changes
api/user/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (159 Bytes). View file
 
api/user/__pycache__/routes.cpython-310.pyc ADDED
Binary file (8.12 kB). View file
 
api/user/__pycache__/tasks.cpython-310.pyc ADDED
Binary file (466 Bytes). View file
 
api/user/routes.py ADDED
@@ -0,0 +1,321 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from flask import Blueprint, request, jsonify
3
+ import requests
4
+ from pymongo import MongoClient
5
+ from bson.objectid import ObjectId
6
+ from werkzeug.utils import secure_filename
7
+
8
+ from application.api.user.tasks import ingest
9
+
10
+ from application.core.settings import settings
11
+ from application.vectorstore.vector_creator import VectorCreator
12
+
13
+ mongo = MongoClient(settings.MONGO_URI)
14
+ db = mongo["docsgpt"]
15
+ conversations_collection = db["conversations"]
16
+ vectors_collection = db["vectors"]
17
+ prompts_collection = db["prompts"]
18
+ feedback_collection = db["feedback"]
19
+ user = Blueprint('user', __name__)
20
+
21
+ current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
22
+
23
+ @user.route("/api/delete_conversation", methods=["POST"])
24
+ def delete_conversation():
25
+ # deletes a conversation from the database
26
+ conversation_id = request.args.get("id")
27
+ # write to mongodb
28
+ conversations_collection.delete_one(
29
+ {
30
+ "_id": ObjectId(conversation_id),
31
+ }
32
+ )
33
+
34
+ return {"status": "ok"}
35
+
36
+ @user.route("/api/get_conversations", methods=["get"])
37
+ def get_conversations():
38
+ # provides a list of conversations
39
+ conversations = conversations_collection.find().sort("date", -1)
40
+ list_conversations = []
41
+ for conversation in conversations:
42
+ list_conversations.append({"id": str(conversation["_id"]), "name": conversation["name"]})
43
+
44
+ #list_conversations = [{"id": "default", "name": "default"}, {"id": "jeff", "name": "jeff"}]
45
+
46
+ return jsonify(list_conversations)
47
+
48
+
49
+ @user.route("/api/get_single_conversation", methods=["get"])
50
+ def get_single_conversation():
51
+ # provides data for a conversation
52
+ conversation_id = request.args.get("id")
53
+ conversation = conversations_collection.find_one({"_id": ObjectId(conversation_id)})
54
+ return jsonify(conversation['queries'])
55
+
56
+ @user.route("/api/update_conversation_name", methods=["POST"])
57
+ def update_conversation_name():
58
+ # update data for a conversation
59
+ data = request.get_json()
60
+ id = data["id"]
61
+ name = data["name"]
62
+ conversations_collection.update_one({"_id": ObjectId(id)},{"$set":{"name":name}})
63
+ return {"status": "ok"}
64
+
65
+
66
+ @user.route("/api/feedback", methods=["POST"])
67
+ def api_feedback():
68
+ data = request.get_json()
69
+ question = data["question"]
70
+ answer = data["answer"]
71
+ feedback = data["feedback"]
72
+
73
+
74
+ feedback_collection.insert_one(
75
+ {
76
+ "question": question,
77
+ "answer": answer,
78
+ "feedback": feedback,
79
+ }
80
+ )
81
+ return {"status": "ok"}
82
+
83
+ @user.route("/api/delete_by_ids", methods=["get"])
84
+ def delete_by_ids():
85
+ """Delete by ID. These are the IDs in the vectorstore"""
86
+
87
+ ids = request.args.get("path")
88
+ if not ids:
89
+ return {"status": "error"}
90
+
91
+ if settings.VECTOR_STORE == "faiss":
92
+ result = vectors_collection.delete_index(ids=ids)
93
+ if result:
94
+ return {"status": "ok"}
95
+ return {"status": "error"}
96
+
97
+ @user.route("/api/delete_old", methods=["get"])
98
+ def delete_old():
99
+ """Delete old indexes."""
100
+ import shutil
101
+
102
+ path = request.args.get("path")
103
+ dirs = path.split("/")
104
+ dirs_clean = []
105
+ for i in range(0, len(dirs)):
106
+ dirs_clean.append(secure_filename(dirs[i]))
107
+ # check that path strats with indexes or vectors
108
+
109
+ if dirs_clean[0] not in ["indexes", "vectors"]:
110
+ return {"status": "error"}
111
+ path_clean = "/".join(dirs_clean)
112
+ vectors_collection.delete_one({"name": dirs_clean[-1], 'user': dirs_clean[-2]})
113
+ if settings.VECTOR_STORE == "faiss":
114
+ try:
115
+ shutil.rmtree(os.path.join(current_dir, path_clean))
116
+ except FileNotFoundError:
117
+ pass
118
+ else:
119
+ vetorstore = VectorCreator.create_vectorstore(
120
+ settings.VECTOR_STORE, path=os.path.join(current_dir, path_clean)
121
+ )
122
+ vetorstore.delete_index()
123
+
124
+ return {"status": "ok"}
125
+
126
+ @user.route("/api/upload", methods=["POST"])
127
+ def upload_file():
128
+ """Upload a file to get vectorized and indexed."""
129
+ if "user" not in request.form:
130
+ return {"status": "no user"}
131
+ user = secure_filename(request.form["user"])
132
+ if "name" not in request.form:
133
+ return {"status": "no name"}
134
+ job_name = secure_filename(request.form["name"])
135
+ # check if the post request has the file part
136
+ if "file" not in request.files:
137
+ print("No file part")
138
+ return {"status": "no file"}
139
+ file = request.files["file"]
140
+ if file.filename == "":
141
+ return {"status": "no file name"}
142
+
143
+ if file:
144
+ filename = secure_filename(file.filename)
145
+ # save dir
146
+ save_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, job_name)
147
+ # create dir if not exists
148
+ if not os.path.exists(save_dir):
149
+ os.makedirs(save_dir)
150
+
151
+ file.save(os.path.join(save_dir, filename))
152
+ task = ingest.delay(settings.UPLOAD_FOLDER, [".rst", ".md", ".pdf", ".txt", ".docx",
153
+ ".csv", ".epub", ".html", ".mdx"],
154
+ job_name, filename, user)
155
+ # task id
156
+ task_id = task.id
157
+ return {"status": "ok", "task_id": task_id}
158
+ else:
159
+ return {"status": "error"}
160
+
161
+ @user.route("/api/task_status", methods=["GET"])
162
+ def task_status():
163
+ """Get celery job status."""
164
+ task_id = request.args.get("task_id")
165
+ from application.celery import celery
166
+ task = celery.AsyncResult(task_id)
167
+ task_meta = task.info
168
+ return {"status": task.status, "result": task_meta}
169
+
170
+
171
+ @user.route("/api/combine", methods=["GET"])
172
+ def combined_json():
173
+ user = "local"
174
+ """Provide json file with combined available indexes."""
175
+ # get json from https://d3dg1063dc54p9.cloudfront.net/combined.json
176
+
177
+ data = [
178
+ {
179
+ "name": "default",
180
+ "language": "default",
181
+ "version": "",
182
+ "description": "default",
183
+ "fullName": "default",
184
+ "date": "default",
185
+ "docLink": "default",
186
+ "model": settings.EMBEDDINGS_NAME,
187
+ "location": "remote",
188
+ }
189
+ ]
190
+ # structure: name, language, version, description, fullName, date, docLink
191
+ # append data from vectors_collection
192
+ for index in vectors_collection.find({"user": user}):
193
+ data.append(
194
+ {
195
+ "name": index["name"],
196
+ "language": index["language"],
197
+ "version": "",
198
+ "description": index["name"],
199
+ "fullName": index["name"],
200
+ "date": index["date"],
201
+ "docLink": index["location"],
202
+ "model": settings.EMBEDDINGS_NAME,
203
+ "location": "local",
204
+ }
205
+ )
206
+ if settings.VECTOR_STORE == "faiss":
207
+ data_remote = requests.get("https://d3dg1063dc54p9.cloudfront.net/combined.json").json()
208
+ for index in data_remote:
209
+ index["location"] = "remote"
210
+ data.append(index)
211
+
212
+ return jsonify(data)
213
+
214
+
215
+ @user.route("/api/docs_check", methods=["POST"])
216
+ def check_docs():
217
+ # check if docs exist in a vectorstore folder
218
+ data = request.get_json()
219
+ # split docs on / and take first part
220
+ if data["docs"].split("/")[0] == "local":
221
+ return {"status": "exists"}
222
+ vectorstore = "vectors/" + data["docs"]
223
+ base_path = "https://raw.githubusercontent.com/arc53/DocsHUB/main/"
224
+ if os.path.exists(vectorstore) or data["docs"] == "default":
225
+ return {"status": "exists"}
226
+ else:
227
+ r = requests.get(base_path + vectorstore + "index.faiss")
228
+
229
+ if r.status_code != 200:
230
+ return {"status": "null"}
231
+ else:
232
+ if not os.path.exists(vectorstore):
233
+ os.makedirs(vectorstore)
234
+ with open(vectorstore + "index.faiss", "wb") as f:
235
+ f.write(r.content)
236
+
237
+ # download the store
238
+ r = requests.get(base_path + vectorstore + "index.pkl")
239
+ with open(vectorstore + "index.pkl", "wb") as f:
240
+ f.write(r.content)
241
+
242
+ return {"status": "loaded"}
243
+
244
+ @user.route("/api/create_prompt", methods=["POST"])
245
+ def create_prompt():
246
+ data = request.get_json()
247
+ content = data["content"]
248
+ name = data["name"]
249
+ if name == "":
250
+ return {"status": "error"}
251
+ user = "local"
252
+ resp = prompts_collection.insert_one(
253
+ {
254
+ "name": name,
255
+ "content": content,
256
+ "user": user,
257
+ }
258
+ )
259
+ new_id = str(resp.inserted_id)
260
+ return {"id": new_id}
261
+
262
+ @user.route("/api/get_prompts", methods=["GET"])
263
+ def get_prompts():
264
+ user = "local"
265
+ prompts = prompts_collection.find({"user": user})
266
+ list_prompts = []
267
+ list_prompts.append({"id": "default", "name": "default", "type": "public"})
268
+ list_prompts.append({"id": "creative", "name": "creative", "type": "public"})
269
+ list_prompts.append({"id": "strict", "name": "strict", "type": "public"})
270
+ for prompt in prompts:
271
+ list_prompts.append({"id": str(prompt["_id"]), "name": prompt["name"], "type": "private"})
272
+
273
+ return jsonify(list_prompts)
274
+
275
+ @user.route("/api/get_single_prompt", methods=["GET"])
276
+ def get_single_prompt():
277
+ prompt_id = request.args.get("id")
278
+ if prompt_id == 'default':
279
+ with open(os.path.join(current_dir, "prompts", "chat_combine_default.txt"), "r") as f:
280
+ chat_combine_template = f.read()
281
+ return jsonify({"content": chat_combine_template})
282
+ elif prompt_id == 'creative':
283
+ with open(os.path.join(current_dir, "prompts", "chat_combine_creative.txt"), "r") as f:
284
+ chat_reduce_creative = f.read()
285
+ return jsonify({"content": chat_reduce_creative})
286
+ elif prompt_id == 'strict':
287
+ with open(os.path.join(current_dir, "prompts", "chat_combine_strict.txt"), "r") as f:
288
+ chat_reduce_strict = f.read()
289
+ return jsonify({"content": chat_reduce_strict})
290
+
291
+
292
+ prompt = prompts_collection.find_one({"_id": ObjectId(prompt_id)})
293
+ return jsonify({"content": prompt["content"]})
294
+
295
+ @user.route("/api/delete_prompt", methods=["POST"])
296
+ def delete_prompt():
297
+ data = request.get_json()
298
+ id = data["id"]
299
+ prompts_collection.delete_one(
300
+ {
301
+ "_id": ObjectId(id),
302
+ }
303
+ )
304
+ return {"status": "ok"}
305
+
306
+ @user.route("/api/update_prompt", methods=["POST"])
307
+ def update_prompt_name():
308
+ data = request.get_json()
309
+ id = data["id"]
310
+ name = data["name"]
311
+ content = data["content"]
312
+ # check if name is null
313
+ if name == "":
314
+ return {"status": "error"}
315
+ prompts_collection.update_one({"_id": ObjectId(id)},{"$set":{"name":name, "content": content}})
316
+ return {"status": "ok"}
317
+
318
+
319
+
320
+
321
+
api/user/tasks.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from application.worker import ingest_worker
2
+ from application.celery import celery
3
+
4
+ @celery.task(bind=True)
5
+ def ingest(self, directory, formats, name_job, filename, user):
6
+ resp = ingest_worker(self, directory, formats, name_job, filename, user)
7
+ return resp
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import platform
2
+ import dotenv
3
+ from application.celery import celery
4
+ from flask import Flask, request, redirect
5
+ from application.core.settings import settings
6
+ from application.api.user.routes import user
7
+ from application.api.answer.routes import answer
8
+ from application.api.internal.routes import internal
9
+
10
+ if platform.system() == "Windows":
11
+ import pathlib
12
+ pathlib.PosixPath = pathlib.WindowsPath
13
+
14
+ dotenv.load_dotenv()
15
+
16
+ app = Flask(__name__)
17
+ app.register_blueprint(user)
18
+ app.register_blueprint(answer)
19
+ app.register_blueprint(internal)
20
+ app.config.update(
21
+ UPLOAD_FOLDER="inputs",
22
+ CELERY_BROKER_URL=settings.CELERY_BROKER_URL,
23
+ CELERY_RESULT_BACKEND=settings.CELERY_RESULT_BACKEND,
24
+ MONGO_URI=settings.MONGO_URI
25
+ )
26
+ celery.config_from_object("application.celeryconfig")
27
+
28
+ @app.route("/")
29
+ def home():
30
+ if request.remote_addr in ('0.0.0.0', '127.0.0.1', 'localhost', '172.18.0.1'):
31
+ return redirect('http://localhost:5173')
32
+ else:
33
+ return 'Welcome to DocsGPT Backend!'
34
+
35
+ @app.after_request
36
+ def after_request(response):
37
+ response.headers.add("Access-Control-Allow-Origin", "*")
38
+ response.headers.add("Access-Control-Allow-Headers", "Content-Type,Authorization")
39
+ response.headers.add("Access-Control-Allow-Methods", "GET,PUT,POST,DELETE,OPTIONS")
40
+ return response
41
+
42
+ if __name__ == "__main__":
43
+ app.run(debug=True, port=7091)
44
+
celery.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from celery import Celery
2
+ from application.core.settings import settings
3
+
4
+ def make_celery(app_name=__name__):
5
+ celery = Celery(app_name, broker=settings.CELERY_BROKER_URL, backend=settings.CELERY_RESULT_BACKEND)
6
+ celery.conf.update(settings)
7
+ return celery
8
+
9
+ celery = make_celery()
celeryconfig.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ broker_url = os.getenv("CELERY_BROKER_URL")
4
+ result_backend = os.getenv("CELERY_RESULT_BACKEND")
5
+
6
+ task_serializer = 'json'
7
+ result_serializer = 'json'
8
+ accept_content = ['json']
core/__init__.py ADDED
File without changes
core/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (155 Bytes). View file
 
core/__pycache__/settings.cpython-310.pyc ADDED
Binary file (1.92 kB). View file
 
core/settings.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from typing import Optional
3
+ import os
4
+
5
+ from pydantic_settings import BaseSettings
6
+ current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
7
+
8
+
9
+ class Settings(BaseSettings):
10
+ LLM_NAME: str = "docsgpt"
11
+ EMBEDDINGS_NAME: str = "huggingface_sentence-transformers/all-mpnet-base-v2"
12
+ CELERY_BROKER_URL: str = "redis://localhost:6379/0"
13
+ CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
14
+ MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
15
+ MODEL_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
16
+ TOKENS_MAX_HISTORY: int = 150
17
+ UPLOAD_FOLDER: str = "inputs"
18
+ VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch"
19
+
20
+ API_URL: str = "http://localhost:7091" # backend url for celery worker
21
+
22
+ API_KEY: Optional[str] = None # LLM api key
23
+ EMBEDDINGS_KEY: Optional[str] = None # api key for embeddings (if using openai, just copy API_KEY)
24
+ OPENAI_API_BASE: Optional[str] = None # azure openai api base url
25
+ OPENAI_API_VERSION: Optional[str] = None # azure openai api version
26
+ AZURE_DEPLOYMENT_NAME: Optional[str] = None # azure deployment name for answering
27
+ AZURE_EMBEDDINGS_DEPLOYMENT_NAME: Optional[str] = None # azure deployment name for embeddings
28
+
29
+ # elasticsearch
30
+ ELASTIC_CLOUD_ID: Optional[str] = None # cloud id for elasticsearch
31
+ ELASTIC_USERNAME: Optional[str] = None # username for elasticsearch
32
+ ELASTIC_PASSWORD: Optional[str] = None # password for elasticsearch
33
+ ELASTIC_URL: Optional[str] = None # url for elasticsearch
34
+ ELASTIC_INDEX: Optional[str] = "docsgpt" # index name for elasticsearch
35
+
36
+ # SageMaker config
37
+ SAGEMAKER_ENDPOINT: Optional[str] = None # SageMaker endpoint name
38
+ SAGEMAKER_REGION: Optional[str] = None # SageMaker region name
39
+ SAGEMAKER_ACCESS_KEY: Optional[str] = None # SageMaker access key
40
+ SAGEMAKER_SECRET_KEY: Optional[str] = None # SageMaker secret key
41
+
42
+
43
+ path = Path(__file__).parent.parent.absolute()
44
+ settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8")
error.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import jsonify
2
+ from werkzeug.http import HTTP_STATUS_CODES
3
+
4
+
5
+ def response_error(code_status, message=None):
6
+ payload = {'error': HTTP_STATUS_CODES.get(code_status, "something went wrong")}
7
+ if message:
8
+ payload['message'] = message
9
+ response = jsonify(payload)
10
+ response.status_code = code_status
11
+ return response
12
+
13
+
14
+ def bad_request(status_code=400, message=''):
15
+ return response_error(code_status=status_code, message=message)
index.faiss ADDED
Binary file (9.26 kB). View file
 
index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1653826159295b5a262df5228ec9678a919a9fcc3ff94248eeaa55f434c071ef
3
+ size 7866
indexes/local/patil2016.pdf/index.faiss ADDED
Binary file (15.4 kB). View file
 
indexes/local/patil2016.pdf/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccc1aa0edd32b66234b113edba42b67f5fc498851e584863124f44abf3920273
3
+ size 28255
inputs/local/patil2016.pdf/patil2016.pdf ADDED
Binary file (280 kB). View file
 
llm/__init__.py ADDED
File without changes
llm/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (154 Bytes). View file
 
llm/__pycache__/anthropic.cpython-310.pyc ADDED
Binary file (1.65 kB). View file
 
llm/__pycache__/base.cpython-310.pyc ADDED
Binary file (734 Bytes). View file
 
llm/__pycache__/docsgpt_provider.cpython-310.pyc ADDED
Binary file (1.59 kB). View file
 
llm/__pycache__/huggingface.cpython-310.pyc ADDED
Binary file (1.81 kB). View file
 
llm/__pycache__/llama_cpp.cpython-310.pyc ADDED
Binary file (1.58 kB). View file
 
llm/__pycache__/llm_creator.cpython-310.pyc ADDED
Binary file (1.15 kB). View file
 
llm/__pycache__/openai.cpython-310.pyc ADDED
Binary file (2.16 kB). View file
 
llm/__pycache__/sagemaker.cpython-310.pyc ADDED
Binary file (4.33 kB). View file
 
llm/anthropic.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from application.llm.base import BaseLLM
2
+ from application.core.settings import settings
3
+
4
+ class AnthropicLLM(BaseLLM):
5
+
6
+ def __init__(self, api_key=None):
7
+ from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
8
+ self.api_key = api_key or settings.ANTHROPIC_API_KEY # If not provided, use a default from settings
9
+ self.anthropic = Anthropic(api_key=self.api_key)
10
+ self.HUMAN_PROMPT = HUMAN_PROMPT
11
+ self.AI_PROMPT = AI_PROMPT
12
+
13
+ def gen(self, model, messages, engine=None, max_tokens=300, stream=False, **kwargs):
14
+ context = messages[0]['content']
15
+ user_question = messages[-1]['content']
16
+ prompt = f"### Context \n {context} \n ### Question \n {user_question}"
17
+ if stream:
18
+ return self.gen_stream(model, prompt, max_tokens, **kwargs)
19
+
20
+ completion = self.anthropic.completions.create(
21
+ model=model,
22
+ max_tokens_to_sample=max_tokens,
23
+ stream=stream,
24
+ prompt=f"{self.HUMAN_PROMPT} {prompt}{self.AI_PROMPT}",
25
+ )
26
+ return completion.completion
27
+
28
+ def gen_stream(self, model, messages, engine=None, max_tokens=300, **kwargs):
29
+ context = messages[0]['content']
30
+ user_question = messages[-1]['content']
31
+ prompt = f"### Context \n {context} \n ### Question \n {user_question}"
32
+ stream_response = self.anthropic.completions.create(
33
+ model=model,
34
+ prompt=f"{self.HUMAN_PROMPT} {prompt}{self.AI_PROMPT}",
35
+ max_tokens_to_sample=max_tokens,
36
+ stream=True,
37
+ )
38
+
39
+ for completion in stream_response:
40
+ yield completion.completion
llm/base.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+
3
+
4
+ class BaseLLM(ABC):
5
+ def __init__(self):
6
+ pass
7
+
8
+ @abstractmethod
9
+ def gen(self, *args, **kwargs):
10
+ pass
11
+
12
+ @abstractmethod
13
+ def gen_stream(self, *args, **kwargs):
14
+ pass
llm/docsgpt_provider.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from application.llm.base import BaseLLM
2
+ import json
3
+ import requests
4
+
5
+ class DocsGPTAPILLM(BaseLLM):
6
+
7
+ def __init__(self, *args, **kwargs):
8
+ self.endpoint = "https://llm.docsgpt.co.uk"
9
+
10
+
11
+ def gen(self, model, engine, messages, stream=False, **kwargs):
12
+ context = messages[0]['content']
13
+ user_question = messages[-1]['content']
14
+ prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
15
+
16
+ response = requests.post(
17
+ f"{self.endpoint}/answer",
18
+ json={
19
+ "prompt": prompt,
20
+ "max_new_tokens": 30
21
+ }
22
+ )
23
+ response_clean = response.json()['a'].split("###")[0]
24
+
25
+ return response_clean
26
+
27
+ def gen_stream(self, model, engine, messages, stream=True, **kwargs):
28
+ context = messages[0]['content']
29
+ user_question = messages[-1]['content']
30
+ prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
31
+
32
+ # send prompt to endpoint /stream
33
+ response = requests.post(
34
+ f"{self.endpoint}/stream",
35
+ json={
36
+ "prompt": prompt,
37
+ "max_new_tokens": 256
38
+ },
39
+ stream=True
40
+ )
41
+
42
+ for line in response.iter_lines():
43
+ if line:
44
+ #data = json.loads(line)
45
+ data_str = line.decode('utf-8')
46
+ if data_str.startswith("data: "):
47
+ data = json.loads(data_str[6:])
48
+ yield data['a']
49
+
llm/huggingface.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from application.llm.base import BaseLLM
2
+
3
+ class HuggingFaceLLM(BaseLLM):
4
+
5
+ def __init__(self, api_key, llm_name='Arc53/DocsGPT-7B',q=False):
6
+ global hf
7
+
8
+ from langchain.llms import HuggingFacePipeline
9
+ if q:
10
+ import torch
11
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
12
+ tokenizer = AutoTokenizer.from_pretrained(llm_name)
13
+ bnb_config = BitsAndBytesConfig(
14
+ load_in_4bit=True,
15
+ bnb_4bit_use_double_quant=True,
16
+ bnb_4bit_quant_type="nf4",
17
+ bnb_4bit_compute_dtype=torch.bfloat16
18
+ )
19
+ model = AutoModelForCausalLM.from_pretrained(llm_name,quantization_config=bnb_config)
20
+ else:
21
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
22
+ tokenizer = AutoTokenizer.from_pretrained(llm_name)
23
+ model = AutoModelForCausalLM.from_pretrained(llm_name)
24
+
25
+ pipe = pipeline(
26
+ "text-generation", model=model,
27
+ tokenizer=tokenizer, max_new_tokens=2000,
28
+ device_map="auto", eos_token_id=tokenizer.eos_token_id
29
+ )
30
+ hf = HuggingFacePipeline(pipeline=pipe)
31
+
32
+ def gen(self, model, engine, messages, stream=False, **kwargs):
33
+ context = messages[0]['content']
34
+ user_question = messages[-1]['content']
35
+ prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
36
+
37
+ result = hf(prompt)
38
+
39
+ return result.content
40
+
41
+ def gen_stream(self, model, engine, messages, stream=True, **kwargs):
42
+
43
+ raise NotImplementedError("HuggingFaceLLM Streaming is not implemented yet.")
44
+
llm/llama_cpp.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from application.llm.base import BaseLLM
2
+ from application.core.settings import settings
3
+
4
+ class LlamaCpp(BaseLLM):
5
+
6
+ def __init__(self, api_key, llm_name=settings.MODEL_PATH, **kwargs):
7
+ global llama
8
+ try:
9
+ from llama_cpp import Llama
10
+ except ImportError:
11
+ raise ImportError("Please install llama_cpp using pip install llama-cpp-python")
12
+
13
+ llama = Llama(model_path=llm_name, n_ctx=2048)
14
+
15
+ def gen(self, model, engine, messages, stream=False, **kwargs):
16
+ context = messages[0]['content']
17
+ user_question = messages[-1]['content']
18
+ prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
19
+
20
+ result = llama(prompt, max_tokens=150, echo=False)
21
+
22
+ # import sys
23
+ # print(result['choices'][0]['text'].split('### Answer \n')[-1], file=sys.stderr)
24
+
25
+ return result['choices'][0]['text'].split('### Answer \n')[-1]
26
+
27
+ def gen_stream(self, model, engine, messages, stream=True, **kwargs):
28
+ context = messages[0]['content']
29
+ user_question = messages[-1]['content']
30
+ prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
31
+
32
+ result = llama(prompt, max_tokens=150, echo=False, stream=stream)
33
+
34
+ # import sys
35
+ # print(list(result), file=sys.stderr)
36
+
37
+ for item in result:
38
+ for choice in item['choices']:
39
+ yield choice['text']
llm/llm_creator.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from application.llm.openai import OpenAILLM, AzureOpenAILLM
2
+ from application.llm.sagemaker import SagemakerAPILLM
3
+ from application.llm.huggingface import HuggingFaceLLM
4
+ from application.llm.llama_cpp import LlamaCpp
5
+ from application.llm.anthropic import AnthropicLLM
6
+ from application.llm.docsgpt_provider import DocsGPTAPILLM
7
+
8
+
9
+
10
+ class LLMCreator:
11
+ llms = {
12
+ 'openai': OpenAILLM,
13
+ 'azure_openai': AzureOpenAILLM,
14
+ 'sagemaker': SagemakerAPILLM,
15
+ 'huggingface': HuggingFaceLLM,
16
+ 'llama.cpp': LlamaCpp,
17
+ 'anthropic': AnthropicLLM,
18
+ 'docsgpt': DocsGPTAPILLM
19
+ }
20
+
21
+ @classmethod
22
+ def create_llm(cls, type, *args, **kwargs):
23
+ llm_class = cls.llms.get(type.lower())
24
+ if not llm_class:
25
+ raise ValueError(f"No LLM class found for type {type}")
26
+ return llm_class(*args, **kwargs)
llm/openai.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from application.llm.base import BaseLLM
2
+ from application.core.settings import settings
3
+
4
+ class OpenAILLM(BaseLLM):
5
+
6
+ def __init__(self, api_key):
7
+ global openai
8
+ from openai import OpenAI
9
+
10
+ self.client = OpenAI(
11
+ api_key=api_key,
12
+ )
13
+ self.api_key = api_key
14
+
15
+ def _get_openai(self):
16
+ # Import openai when needed
17
+ import openai
18
+
19
+ return openai
20
+
21
+ def gen(self, model, engine, messages, stream=False, **kwargs):
22
+ response = self.client.chat.completions.create(model=model,
23
+ messages=messages,
24
+ stream=stream,
25
+ **kwargs)
26
+
27
+ return response.choices[0].message.content
28
+
29
+ def gen_stream(self, model, engine, messages, stream=True, **kwargs):
30
+ response = self.client.chat.completions.create(model=model,
31
+ messages=messages,
32
+ stream=stream,
33
+ **kwargs)
34
+
35
+ for line in response:
36
+ # import sys
37
+ # print(line.choices[0].delta.content, file=sys.stderr)
38
+ if line.choices[0].delta.content is not None:
39
+ yield line.choices[0].delta.content
40
+
41
+
42
+ class AzureOpenAILLM(OpenAILLM):
43
+
44
+ def __init__(self, openai_api_key, openai_api_base, openai_api_version, deployment_name):
45
+ super().__init__(openai_api_key)
46
+ self.api_base = settings.OPENAI_API_BASE,
47
+ self.api_version = settings.OPENAI_API_VERSION,
48
+ self.deployment_name = settings.AZURE_DEPLOYMENT_NAME,
49
+ from openai import AzureOpenAI
50
+ self.client = AzureOpenAI(
51
+ api_key=openai_api_key,
52
+ api_version=settings.OPENAI_API_VERSION,
53
+ api_base=settings.OPENAI_API_BASE,
54
+ deployment_name=settings.AZURE_DEPLOYMENT_NAME,
55
+ )
56
+
57
+ def _get_openai(self):
58
+ openai = super()._get_openai()
59
+
60
+ return openai
llm/sagemaker.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from application.llm.base import BaseLLM
2
+ from application.core.settings import settings
3
+ import json
4
+ import io
5
+
6
+
7
+
8
+ class LineIterator:
9
+ """
10
+ A helper class for parsing the byte stream input.
11
+
12
+ The output of the model will be in the following format:
13
+ ```
14
+ b'{"outputs": [" a"]}\n'
15
+ b'{"outputs": [" challenging"]}\n'
16
+ b'{"outputs": [" problem"]}\n'
17
+ ...
18
+ ```
19
+
20
+ While usually each PayloadPart event from the event stream will contain a byte array
21
+ with a full json, this is not guaranteed and some of the json objects may be split across
22
+ PayloadPart events. For example:
23
+ ```
24
+ {'PayloadPart': {'Bytes': b'{"outputs": '}}
25
+ {'PayloadPart': {'Bytes': b'[" problem"]}\n'}}
26
+ ```
27
+
28
+ This class accounts for this by concatenating bytes written via the 'write' function
29
+ and then exposing a method which will return lines (ending with a '\n' character) within
30
+ the buffer via the 'scan_lines' function. It maintains the position of the last read
31
+ position to ensure that previous bytes are not exposed again.
32
+ """
33
+
34
+ def __init__(self, stream):
35
+ self.byte_iterator = iter(stream)
36
+ self.buffer = io.BytesIO()
37
+ self.read_pos = 0
38
+
39
+ def __iter__(self):
40
+ return self
41
+
42
+ def __next__(self):
43
+ while True:
44
+ self.buffer.seek(self.read_pos)
45
+ line = self.buffer.readline()
46
+ if line and line[-1] == ord('\n'):
47
+ self.read_pos += len(line)
48
+ return line[:-1]
49
+ try:
50
+ chunk = next(self.byte_iterator)
51
+ except StopIteration:
52
+ if self.read_pos < self.buffer.getbuffer().nbytes:
53
+ continue
54
+ raise
55
+ if 'PayloadPart' not in chunk:
56
+ print('Unknown event type:' + chunk)
57
+ continue
58
+ self.buffer.seek(0, io.SEEK_END)
59
+ self.buffer.write(chunk['PayloadPart']['Bytes'])
60
+
61
+ class SagemakerAPILLM(BaseLLM):
62
+
63
+ def __init__(self, *args, **kwargs):
64
+ import boto3
65
+ runtime = boto3.client(
66
+ 'runtime.sagemaker',
67
+ aws_access_key_id='xxx',
68
+ aws_secret_access_key='xxx',
69
+ region_name='us-west-2'
70
+ )
71
+
72
+
73
+ self.endpoint = settings.SAGEMAKER_ENDPOINT
74
+ self.runtime = runtime
75
+
76
+
77
+ def gen(self, model, engine, messages, stream=False, **kwargs):
78
+ context = messages[0]['content']
79
+ user_question = messages[-1]['content']
80
+ prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
81
+
82
+
83
+ # Construct payload for endpoint
84
+ payload = {
85
+ "inputs": prompt,
86
+ "stream": False,
87
+ "parameters": {
88
+ "do_sample": True,
89
+ "temperature": 0.1,
90
+ "max_new_tokens": 30,
91
+ "repetition_penalty": 1.03,
92
+ "stop": ["</s>", "###"]
93
+ }
94
+ }
95
+ body_bytes = json.dumps(payload).encode('utf-8')
96
+
97
+ # Invoke the endpoint
98
+ response = self.runtime.invoke_endpoint(EndpointName=self.endpoint,
99
+ ContentType='application/json',
100
+ Body=body_bytes)
101
+ result = json.loads(response['Body'].read().decode())
102
+ import sys
103
+ print(result[0]['generated_text'], file=sys.stderr)
104
+ return result[0]['generated_text'][len(prompt):]
105
+
106
+ def gen_stream(self, model, engine, messages, stream=True, **kwargs):
107
+ context = messages[0]['content']
108
+ user_question = messages[-1]['content']
109
+ prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
110
+
111
+
112
+ # Construct payload for endpoint
113
+ payload = {
114
+ "inputs": prompt,
115
+ "stream": True,
116
+ "parameters": {
117
+ "do_sample": True,
118
+ "temperature": 0.1,
119
+ "max_new_tokens": 512,
120
+ "repetition_penalty": 1.03,
121
+ "stop": ["</s>", "###"]
122
+ }
123
+ }
124
+ body_bytes = json.dumps(payload).encode('utf-8')
125
+
126
+ # Invoke the endpoint
127
+ response = self.runtime.invoke_endpoint_with_response_stream(EndpointName=self.endpoint,
128
+ ContentType='application/json',
129
+ Body=body_bytes)
130
+ #result = json.loads(response['Body'].read().decode())
131
+ event_stream = response['Body']
132
+ start_json = b'{'
133
+ for line in LineIterator(event_stream):
134
+ if line != b'' and start_json in line:
135
+ #print(line)
136
+ data = json.loads(line[line.find(start_json):].decode('utf-8'))
137
+ if data['token']['text'] not in ["</s>", "###"]:
138
+ print(data['token']['text'],end='')
139
+ yield data['token']['text']
parser/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+