Spaces:

VTechAI
/

Chat

Runtime error

App Files Files Community

VTechAI commited on Jan 30, 2024

Commit

8a41f4d

1 Parent(s): 14d00fc

init

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Dockerfile +29 -0
__init__.py +0 -0
api/__init__.py +0 -0
api/__pycache__/__init__.cpython-310.pyc +0 -0
api/answer/__init__.py +0 -0
api/answer/__pycache__/__init__.cpython-310.pyc +0 -0
api/answer/__pycache__/routes.cpython-310.pyc +0 -0
api/answer/routes.py +371 -0
api/internal/__init__.py +0 -0
api/internal/__pycache__/__init__.cpython-310.pyc +0 -0
api/internal/__pycache__/routes.cpython-310.pyc +0 -0
api/internal/routes.py +69 -0
api/user/__init__.py +0 -0
api/user/__pycache__/__init__.cpython-310.pyc +0 -0
api/user/__pycache__/routes.cpython-310.pyc +0 -0
api/user/__pycache__/tasks.cpython-310.pyc +0 -0
api/user/routes.py +321 -0
api/user/tasks.py +7 -0
app.py +44 -0
celery.py +9 -0
celeryconfig.py +8 -0
core/__init__.py +0 -0
core/__pycache__/__init__.cpython-310.pyc +0 -0
core/__pycache__/settings.cpython-310.pyc +0 -0
core/settings.py +44 -0
error.py +15 -0
index.faiss +0 -0
index.pkl +3 -0
indexes/local/patil2016.pdf/index.faiss +0 -0
indexes/local/patil2016.pdf/index.pkl +3 -0
inputs/local/patil2016.pdf/patil2016.pdf +0 -0
llm/__init__.py +0 -0
llm/__pycache__/__init__.cpython-310.pyc +0 -0
llm/__pycache__/anthropic.cpython-310.pyc +0 -0
llm/__pycache__/base.cpython-310.pyc +0 -0
llm/__pycache__/docsgpt_provider.cpython-310.pyc +0 -0
llm/__pycache__/huggingface.cpython-310.pyc +0 -0
llm/__pycache__/llama_cpp.cpython-310.pyc +0 -0
llm/__pycache__/llm_creator.cpython-310.pyc +0 -0
llm/__pycache__/openai.cpython-310.pyc +0 -0
llm/__pycache__/sagemaker.cpython-310.pyc +0 -0
llm/anthropic.py +40 -0
llm/base.py +14 -0
llm/docsgpt_provider.py +49 -0
llm/huggingface.py +44 -0
llm/llama_cpp.py +39 -0
llm/llm_creator.py +26 -0
llm/openai.py +60 -0
llm/sagemaker.py +139 -0
parser/__init__.py +1 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,29 @@

+FROM python:3.11-slim-bullseye as builder
+# Tiktoken requires Rust toolchain, so build it in a separate stage
+RUN apt-get update && apt-get install -y gcc curl
+RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && apt-get install --reinstall libc6-dev -y
+ENV PATH="/root/.cargo/bin:${PATH}"
+RUN pip install --upgrade pip && pip install tiktoken==0.5.2
+COPY requirements.txt .
+RUN pip install -r requirements.txt
+RUN apt-get install -y wget unzip
+RUN wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
+RUN unzip mpnet-base-v2.zip -d model
+RUN rm mpnet-base-v2.zip
+FROM python:3.11-slim-bullseye
+# Copy pre-built packages and binaries from builder stage
+COPY --from=builder /usr/local/ /usr/local/
+WORKDIR /app
+COPY --from=builder /model /app/model
+COPY . /app/application
+ENV FLASK_APP=app.py
+ENV FLASK_DEBUG=true
+EXPOSE 7091
+CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:7091", "application.wsgi:app"]

__init__.py ADDED Viewed

File without changes

api/__init__.py ADDED Viewed

File without changes

api/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (154 Bytes). View file

api/answer/__init__.py ADDED Viewed

File without changes

api/answer/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (161 Bytes). View file

api/answer/__pycache__/routes.cpython-310.pyc ADDED Viewed

Binary file (8.1 kB). View file

api/answer/routes.py ADDED Viewed

	@@ -0,0 +1,371 @@

+import asyncio
+import os
+from flask import Blueprint, request, Response
+import json
+import datetime
+import logging
+import traceback
+from pymongo import MongoClient
+from bson.objectid import ObjectId
+from transformers import GPT2TokenizerFast
+from application.core.settings import settings
+from application.vectorstore.vector_creator import VectorCreator
+from application.llm.llm_creator import LLMCreator
+from application.error import bad_request
+logger = logging.getLogger(__name__)
+mongo = MongoClient(settings.MONGO_URI)
+db = mongo["docsgpt"]
+conversations_collection = db["conversations"]
+vectors_collection = db["vectors"]
+prompts_collection = db["prompts"]
+answer = Blueprint('answer', __name__)
+if settings.LLM_NAME == "gpt4":
+    gpt_model = 'gpt-4'
+elif settings.LLM_NAME == "anthropic":
+    gpt_model = 'claude-2'
+else:
+    gpt_model = 'gpt-3.5-turbo'
+# load the prompts
+current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+with open(os.path.join(current_dir, "prompts", "chat_combine_default.txt"), "r") as f:
+    chat_combine_template = f.read()
+with open(os.path.join(current_dir, "prompts", "chat_reduce_prompt.txt"), "r") as f:
+    chat_reduce_template = f.read()
+with open(os.path.join(current_dir, "prompts", "chat_combine_creative.txt"), "r") as f:
+    chat_combine_creative = f.read()
+with open(os.path.join(current_dir, "prompts", "chat_combine_strict.txt"), "r") as f:
+    chat_combine_strict = f.read()
+api_key_set = settings.API_KEY is not None
+embeddings_key_set = settings.EMBEDDINGS_KEY is not None
+async def async_generate(chain, question, chat_history):
+    result = await chain.arun({"question": question, "chat_history": chat_history})
+    return result
+def count_tokens(string):
+    tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
+    return len(tokenizer(string)['input_ids'])
+def run_async_chain(chain, question, chat_history):
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    result = {}
+    try:
+        answer = loop.run_until_complete(async_generate(chain, question, chat_history))
+    finally:
+        loop.close()
+    result["answer"] = answer
+    return result
+def get_vectorstore(data):
+    if "active_docs" in data:
+        if data["active_docs"].split("/")[0] == "default":
+                vectorstore = ""
+        elif data["active_docs"].split("/")[0] == "local":
+            vectorstore = "indexes/" + data["active_docs"]
+        else:
+            vectorstore = "vectors/" + data["active_docs"]
+        if data["active_docs"] == "default":
+            vectorstore = ""
+    else:
+        vectorstore = ""
+    vectorstore = os.path.join("application", vectorstore)
+    return vectorstore
+def is_azure_configured():
+    return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME
+def complete_stream(question, docsearch, chat_history, api_key, prompt_id, conversation_id):
+    llm = LLMCreator.create_llm(settings.LLM_NAME, api_key=api_key)
+    if prompt_id == 'default':
+        prompt = chat_combine_template
+    elif prompt_id == 'creative':
+        prompt = chat_combine_creative
+    elif prompt_id == 'strict':
+        prompt = chat_combine_strict
+    else:
+        prompt = prompts_collection.find_one({"_id": ObjectId(prompt_id)})["content"]
+    docs = docsearch.search(question, k=2)
+    if settings.LLM_NAME == "llama.cpp":
+        docs = [docs[0]]
+    # join all page_content together with a newline
+    docs_together = "\n".join([doc.page_content for doc in docs])
+    p_chat_combine = prompt.replace("{summaries}", docs_together)
+    messages_combine = [{"role": "system", "content": p_chat_combine}]
+    source_log_docs = []
+    for doc in docs:
+        if doc.metadata:
+            source_log_docs.append({"title": doc.metadata['title'].split('/')[-1], "text": doc.page_content})
+        else:
+            source_log_docs.append({"title": doc.page_content, "text": doc.page_content})
+    if len(chat_history) > 1:
+        tokens_current_history = 0
+        # count tokens in history
+        chat_history.reverse()
+        for i in chat_history:
+            if "prompt" in i and "response" in i:
+                tokens_batch = count_tokens(i["prompt"]) + count_tokens(i["response"])
+                if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
+                    tokens_current_history += tokens_batch
+                    messages_combine.append({"role": "user", "content": i["prompt"]})
+                    messages_combine.append({"role": "system", "content": i["response"]})
+    messages_combine.append({"role": "user", "content": question})
+    response_full = ""
+    completion = llm.gen_stream(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME,
+                                messages=messages_combine)
+    for line in completion:
+        data = json.dumps({"answer": str(line)})
+        response_full += str(line)
+        yield f"data: {data}\n\n"
+    # save conversation to database
+    if conversation_id is not None:
+        conversations_collection.update_one(
+            {"_id": ObjectId(conversation_id)},
+            {"$push": {"queries": {"prompt": question, "response": response_full, "sources": source_log_docs}}},
+        )
+    else:
+        # create new conversation
+        # generate summary
+        messages_summary = [{"role": "assistant", "content": "Summarise following conversation in no more than 3 "
+                                                             "words, respond ONLY with the summary, use the same "
+                                                             "language as the system \n\nUser: " + question + "\n\n" +
+                                                             "AI: " +
+                                                             response_full},
+                            {"role": "user", "content": "Summarise following conversation in no more than 3 words, "
+                                                        "respond ONLY with the summary, use the same language as the "
+                                                        "system"}]
+        completion = llm.gen(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME,
+                             messages=messages_summary, max_tokens=30)
+        conversation_id = conversations_collection.insert_one(
+            {"user": "local",
+             "date": datetime.datetime.utcnow(),
+             "name": completion,
+             "queries": [{"prompt": question, "response": response_full, "sources": source_log_docs}]}
+        ).inserted_id
+    # send data.type = "end" to indicate that the stream has ended as json
+    data = json.dumps({"type": "id", "id": str(conversation_id)})
+    yield f"data: {data}\n\n"
+    data = json.dumps({"type": "end"})
+    yield f"data: {data}\n\n"
+@answer.route("/stream", methods=["POST"])
+def stream():
+    data = request.get_json()
+    # get parameter from url question
+    question = data["question"]
+    history = data["history"]
+    # history to json object from string
+    history = json.loads(history)
+    conversation_id = data["conversation_id"]
+    if 'prompt_id' in data:
+        prompt_id = data["prompt_id"]
+    else:
+        prompt_id = 'default'
+    # check if active_docs is set
+    if not api_key_set:
+        api_key = data["api_key"]
+    else:
+        api_key = settings.API_KEY
+    if not embeddings_key_set:
+        embeddings_key = data["embeddings_key"]
+    else:
+        embeddings_key = settings.EMBEDDINGS_KEY
+    if "active_docs" in data:
+        vectorstore = get_vectorstore({"active_docs": data["active_docs"]})
+    else:
+        vectorstore = ""
+    docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, vectorstore, embeddings_key)
+    return Response(
+        complete_stream(question, docsearch,
+                        chat_history=history, api_key=api_key,
+                        prompt_id=prompt_id,
+                        conversation_id=conversation_id), mimetype="text/event-stream"
+    )
+@answer.route("/api/answer", methods=["POST"])
+def api_answer():
+    data = request.get_json()
+    question = data["question"]
+    history = data["history"]
+    if "conversation_id" not in data:
+        conversation_id = None
+    else:
+        conversation_id = data["conversation_id"]
+    print("-" * 5)
+    if not api_key_set:
+        api_key = data["api_key"]
+    else:
+        api_key = settings.API_KEY
+    if not embeddings_key_set:
+        embeddings_key = data["embeddings_key"]
+    else:
+        embeddings_key = settings.EMBEDDINGS_KEY
+    if 'prompt_id' in data:
+        prompt_id = data["prompt_id"]
+    else:
+        prompt_id = 'default'
+    if prompt_id == 'default':
+        prompt = chat_combine_template
+    elif prompt_id == 'creative':
+        prompt = chat_combine_creative
+    elif prompt_id == 'strict':
+        prompt = chat_combine_strict
+    else:
+        prompt = prompts_collection.find_one({"_id": ObjectId(prompt_id)})["content"]
+    # use try and except  to check for exception
+    try:
+        # check if the vectorstore is set
+        vectorstore = get_vectorstore(data)
+        # loading the index and the store and the prompt template
+        # Note if you have used other embeddings than OpenAI, you need to change the embeddings
+        docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, vectorstore, embeddings_key)
+        llm = LLMCreator.create_llm(settings.LLM_NAME, api_key=api_key)
+        docs = docsearch.search(question, k=2)
+        # join all page_content together with a newline
+        docs_together = "\n".join([doc.page_content for doc in docs])
+        p_chat_combine = prompt.replace("{summaries}", docs_together)
+        messages_combine = [{"role": "system", "content": p_chat_combine}]
+        source_log_docs = []
+        for doc in docs:
+            if doc.metadata:
+                source_log_docs.append({"title": doc.metadata['title'].split('/')[-1], "text": doc.page_content})
+            else:
+                source_log_docs.append({"title": doc.page_content, "text": doc.page_content})
+        # join all page_content together with a newline
+        if len(history) > 1:
+            tokens_current_history = 0
+            # count tokens in history
+            history.reverse()
+            for i in history:
+                if "prompt" in i and "response" in i:
+                    tokens_batch = count_tokens(i["prompt"]) + count_tokens(i["response"])
+                    if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
+                        tokens_current_history += tokens_batch
+                        messages_combine.append({"role": "user", "content": i["prompt"]})
+                        messages_combine.append({"role": "system", "content": i["response"]})
+        messages_combine.append({"role": "user", "content": question})
+        completion = llm.gen(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME,
+                                    messages=messages_combine)
+        result = {"answer": completion, "sources": source_log_docs}
+        logger.debug(result)
+        # generate conversationId
+        if conversation_id is not None:
+            conversations_collection.update_one(
+                {"_id": ObjectId(conversation_id)},
+                {"$push": {"queries": {"prompt": question,
+                                       "response": result["answer"], "sources": result['sources']}}},
+            )
+        else:
+            # create new conversation
+            # generate summary
+            messages_summary = [
+                {"role": "assistant", "content": "Summarise following conversation in no more than 3 words, "
+                    "respond ONLY with the summary, use the same language as the system \n\n"
+                    "User: " + question + "\n\n" + "AI: " + result["answer"]},
+                {"role": "user", "content": "Summarise following conversation in no more than 3 words, "
+                    "respond ONLY with the summary, use the same language as the system"}
+            ]
+            completion = llm.gen(
+                model=gpt_model,
+                engine=settings.AZURE_DEPLOYMENT_NAME,
+                messages=messages_summary,
+                max_tokens=30
+            )
+            conversation_id = conversations_collection.insert_one(
+                {"user": "local",
+                "date": datetime.datetime.utcnow(),
+                "name": completion,
+                "queries": [{"prompt": question, "response": result["answer"], "sources": source_log_docs}]}
+            ).inserted_id
+        result["conversation_id"] = str(conversation_id)
+        # mock result
+        # result = {
+        #     "answer": "The answer is 42",
+        #     "sources": ["https://en.wikipedia.org/wiki/42_(number)", "https://en.wikipedia.org/wiki/42_(number)"]
+        # }
+        return result
+    except Exception as e:
+        # print whole traceback
+        traceback.print_exc()
+        print(str(e))
+        return bad_request(500, str(e))
+@answer.route("/api/search", methods=["POST"])
+def api_search():
+    data = request.get_json()
+    # get parameter from url question
+    question = data["question"]
+    if not embeddings_key_set:
+        embeddings_key = data["embeddings_key"]
+    else:
+        embeddings_key = settings.EMBEDDINGS_KEY
+    if "active_docs" in data:
+        vectorstore = get_vectorstore({"active_docs": data["active_docs"]})
+    else:
+        vectorstore = ""
+    docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, vectorstore, embeddings_key)
+    docs = docsearch.search(question, k=2)
+    source_log_docs = []
+    for doc in docs:
+        if doc.metadata:
+            source_log_docs.append({"title": doc.metadata['title'].split('/')[-1], "text": doc.page_content})
+        else:
+            source_log_docs.append({"title": doc.page_content, "text": doc.page_content})
+        #yield f"data:{data}\n\n"
+    return source_log_docs

api/internal/__init__.py ADDED Viewed

File without changes

api/internal/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (163 Bytes). View file

api/internal/__pycache__/routes.cpython-310.pyc ADDED Viewed

Binary file (2.07 kB). View file

api/internal/routes.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import os
+import datetime
+from flask import Blueprint, request, send_from_directory
+from pymongo import MongoClient
+from werkzeug.utils import secure_filename
+from application.core.settings import settings
+mongo = MongoClient(settings.MONGO_URI)
+db = mongo["docsgpt"]
+conversations_collection = db["conversations"]
+vectors_collection = db["vectors"]
+current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+internal = Blueprint('internal', __name__)
+@internal.route("/api/download", methods=["get"])
+def download_file():
+    user = secure_filename(request.args.get("user"))
+    job_name = secure_filename(request.args.get("name"))
+    filename = secure_filename(request.args.get("file"))
+    save_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, job_name)
+    return send_from_directory(save_dir, filename, as_attachment=True)
+@internal.route("/api/upload_index", methods=["POST"])
+def upload_index_files():
+    """Upload two files(index.faiss, index.pkl) to the user's folder."""
+    if "user" not in request.form:
+        return {"status": "no user"}
+    user = secure_filename(request.form["user"])
+    if "name" not in request.form:
+        return {"status": "no name"}
+    job_name = secure_filename(request.form["name"])
+    save_dir = os.path.join(current_dir, "indexes", user, job_name)
+    if settings.VECTOR_STORE == "faiss":
+        if "file_faiss" not in request.files:
+            print("No file part")
+            return {"status": "no file"}
+        file_faiss = request.files["file_faiss"]
+        if file_faiss.filename == "":
+            return {"status": "no file name"}
+        if "file_pkl" not in request.files:
+            print("No file part")
+            return {"status": "no file"}
+        file_pkl = request.files["file_pkl"]
+        if file_pkl.filename == "":
+            return {"status": "no file name"}
+        # saves index files
+        if not os.path.exists(save_dir):
+            os.makedirs(save_dir)
+        file_faiss.save(os.path.join(save_dir, "index.faiss"))
+        file_pkl.save(os.path.join(save_dir, "index.pkl"))
+    # create entry in vectors_collection
+    vectors_collection.insert_one(
+        {
+            "user": user,
+            "name": job_name,
+            "language": job_name,
+            "location": save_dir,
+            "date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
+            "model": settings.EMBEDDINGS_NAME,
+            "type": "local",
+        }
+    )
+    return {"status": "ok"}

api/user/__init__.py ADDED Viewed

File without changes

api/user/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (159 Bytes). View file

api/user/__pycache__/routes.cpython-310.pyc ADDED Viewed

Binary file (8.12 kB). View file

api/user/__pycache__/tasks.cpython-310.pyc ADDED Viewed

Binary file (466 Bytes). View file

api/user/routes.py ADDED Viewed

	@@ -0,0 +1,321 @@

+import os
+from flask import Blueprint, request, jsonify
+import requests
+from pymongo import MongoClient
+from bson.objectid import ObjectId
+from werkzeug.utils import secure_filename
+from application.api.user.tasks import ingest
+from application.core.settings import settings
+from application.vectorstore.vector_creator import VectorCreator
+mongo = MongoClient(settings.MONGO_URI)
+db = mongo["docsgpt"]
+conversations_collection = db["conversations"]
+vectors_collection = db["vectors"]
+prompts_collection = db["prompts"]
+feedback_collection = db["feedback"]
+user = Blueprint('user', __name__)
+current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+@user.route("/api/delete_conversation", methods=["POST"])
+def delete_conversation():
+    # deletes a conversation from the database
+    conversation_id = request.args.get("id")
+    # write to mongodb
+    conversations_collection.delete_one(
+        {
+            "_id": ObjectId(conversation_id),
+        }
+    )
+    return {"status": "ok"}
+@user.route("/api/get_conversations", methods=["get"])
+def get_conversations():
+    # provides a list of conversations
+    conversations = conversations_collection.find().sort("date", -1)
+    list_conversations = []
+    for conversation in conversations:
+        list_conversations.append({"id": str(conversation["_id"]), "name": conversation["name"]})
+    #list_conversations = [{"id": "default", "name": "default"}, {"id": "jeff", "name": "jeff"}]
+    return jsonify(list_conversations)
+@user.route("/api/get_single_conversation", methods=["get"])
+def get_single_conversation():
+    # provides data for a conversation
+    conversation_id = request.args.get("id")
+    conversation = conversations_collection.find_one({"_id": ObjectId(conversation_id)})
+    return jsonify(conversation['queries'])
+@user.route("/api/update_conversation_name", methods=["POST"])
+def update_conversation_name():
+    # update data for a conversation
+    data = request.get_json()
+    id = data["id"]
+    name = data["name"]
+    conversations_collection.update_one({"_id": ObjectId(id)},{"$set":{"name":name}})
+    return {"status": "ok"}
+@user.route("/api/feedback", methods=["POST"])
+def api_feedback():
+    data = request.get_json()
+    question = data["question"]
+    answer = data["answer"]
+    feedback = data["feedback"]
+    feedback_collection.insert_one(
+        {
+            "question": question,
+            "answer": answer,
+            "feedback": feedback,
+        }
+    )
+    return {"status": "ok"}
+@user.route("/api/delete_by_ids", methods=["get"])
+def delete_by_ids():
+    """Delete by ID. These are the IDs in the vectorstore"""
+    ids = request.args.get("path")
+    if not ids:
+        return {"status": "error"}
+    if settings.VECTOR_STORE == "faiss":
+        result = vectors_collection.delete_index(ids=ids)
+        if result:
+            return {"status": "ok"}
+    return {"status": "error"}
+@user.route("/api/delete_old", methods=["get"])
+def delete_old():
+    """Delete old indexes."""
+    import shutil
+    path = request.args.get("path")
+    dirs = path.split("/")
+    dirs_clean = []
+    for i in range(0, len(dirs)):
+        dirs_clean.append(secure_filename(dirs[i]))
+    # check that path strats with indexes or vectors
+    if dirs_clean[0] not in ["indexes", "vectors"]:
+        return {"status": "error"}
+    path_clean = "/".join(dirs_clean)
+    vectors_collection.delete_one({"name": dirs_clean[-1], 'user': dirs_clean[-2]})
+    if settings.VECTOR_STORE == "faiss":
+        try:
+            shutil.rmtree(os.path.join(current_dir, path_clean))
+        except FileNotFoundError:
+            pass
+    else:
+        vetorstore = VectorCreator.create_vectorstore(
+            settings.VECTOR_STORE, path=os.path.join(current_dir, path_clean)
+        )
+        vetorstore.delete_index()
+    return {"status": "ok"}
+@user.route("/api/upload", methods=["POST"])
+def upload_file():
+    """Upload a file to get vectorized and indexed."""
+    if "user" not in request.form:
+        return {"status": "no user"}
+    user = secure_filename(request.form["user"])
+    if "name" not in request.form:
+        return {"status": "no name"}
+    job_name = secure_filename(request.form["name"])
+    # check if the post request has the file part
+    if "file" not in request.files:
+        print("No file part")
+        return {"status": "no file"}
+    file = request.files["file"]
+    if file.filename == "":
+        return {"status": "no file name"}
+    if file:
+        filename = secure_filename(file.filename)
+        # save dir
+        save_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, job_name)
+        # create dir if not exists
+        if not os.path.exists(save_dir):
+            os.makedirs(save_dir)
+        file.save(os.path.join(save_dir, filename))
+        task = ingest.delay(settings.UPLOAD_FOLDER, [".rst", ".md", ".pdf", ".txt", ".docx",
+        ".csv", ".epub", ".html", ".mdx"],
+         job_name, filename, user)
+        # task id
+        task_id = task.id
+        return {"status": "ok", "task_id": task_id}
+    else:
+        return {"status": "error"}
+@user.route("/api/task_status", methods=["GET"])
+def task_status():
+    """Get celery job status."""
+    task_id = request.args.get("task_id")
+    from application.celery import celery
+    task = celery.AsyncResult(task_id)
+    task_meta = task.info
+    return {"status": task.status, "result": task_meta}
+@user.route("/api/combine", methods=["GET"])
+def combined_json():
+    user = "local"
+    """Provide json file with combined available indexes."""
+    # get json from https://d3dg1063dc54p9.cloudfront.net/combined.json
+    data = [
+        {
+            "name": "default",
+            "language": "default",
+            "version": "",
+            "description": "default",
+            "fullName": "default",
+            "date": "default",
+            "docLink": "default",
+            "model": settings.EMBEDDINGS_NAME,
+            "location": "remote",
+        }
+    ]
+    # structure: name, language, version, description, fullName, date, docLink
+    # append data from vectors_collection
+    for index in vectors_collection.find({"user": user}):
+        data.append(
+            {
+                "name": index["name"],
+                "language": index["language"],
+                "version": "",
+                "description": index["name"],
+                "fullName": index["name"],
+                "date": index["date"],
+                "docLink": index["location"],
+                "model": settings.EMBEDDINGS_NAME,
+                "location": "local",
+            }
+        )
+    if settings.VECTOR_STORE == "faiss":
+        data_remote = requests.get("https://d3dg1063dc54p9.cloudfront.net/combined.json").json()
+        for index in data_remote:
+            index["location"] = "remote"
+            data.append(index)
+    return jsonify(data)
+@user.route("/api/docs_check", methods=["POST"])
+def check_docs():
+    # check if docs exist in a vectorstore folder
+    data = request.get_json()
+    # split docs on / and take first part
+    if data["docs"].split("/")[0] == "local":
+        return {"status": "exists"}
+    vectorstore = "vectors/" + data["docs"]
+    base_path = "https://raw.githubusercontent.com/arc53/DocsHUB/main/"
+    if os.path.exists(vectorstore) or data["docs"] == "default":
+        return {"status": "exists"}
+    else:
+        r = requests.get(base_path + vectorstore + "index.faiss")
+        if r.status_code != 200:
+            return {"status": "null"}
+        else:
+            if not os.path.exists(vectorstore):
+                os.makedirs(vectorstore)
+            with open(vectorstore + "index.faiss", "wb") as f:
+                f.write(r.content)
+            # download the store
+            r = requests.get(base_path + vectorstore + "index.pkl")
+            with open(vectorstore + "index.pkl", "wb") as f:
+                f.write(r.content)
+        return {"status": "loaded"}
+@user.route("/api/create_prompt", methods=["POST"])
+def create_prompt():
+    data = request.get_json()
+    content = data["content"]
+    name = data["name"]
+    if name == "":
+        return {"status": "error"}
+    user = "local"
+    resp = prompts_collection.insert_one(
+        {
+            "name": name,
+            "content": content,
+            "user": user,
+        }
+    )
+    new_id = str(resp.inserted_id)
+    return {"id": new_id}
+@user.route("/api/get_prompts", methods=["GET"])
+def get_prompts():
+    user = "local"
+    prompts = prompts_collection.find({"user": user})
+    list_prompts = []
+    list_prompts.append({"id": "default", "name": "default", "type": "public"})
+    list_prompts.append({"id": "creative", "name": "creative", "type": "public"})
+    list_prompts.append({"id": "strict", "name": "strict", "type": "public"})
+    for prompt in prompts:
+        list_prompts.append({"id": str(prompt["_id"]), "name": prompt["name"], "type": "private"})
+    return jsonify(list_prompts)
+@user.route("/api/get_single_prompt", methods=["GET"])
+def get_single_prompt():
+    prompt_id = request.args.get("id")
+    if prompt_id == 'default':
+        with open(os.path.join(current_dir, "prompts", "chat_combine_default.txt"), "r") as f:
+            chat_combine_template = f.read()
+        return jsonify({"content": chat_combine_template})
+    elif prompt_id == 'creative':
+        with open(os.path.join(current_dir, "prompts", "chat_combine_creative.txt"), "r") as f:
+            chat_reduce_creative = f.read()
+        return jsonify({"content": chat_reduce_creative})
+    elif prompt_id == 'strict':
+        with open(os.path.join(current_dir, "prompts", "chat_combine_strict.txt"), "r") as f:
+            chat_reduce_strict = f.read()
+        return jsonify({"content": chat_reduce_strict})
+    prompt = prompts_collection.find_one({"_id": ObjectId(prompt_id)})
+    return jsonify({"content": prompt["content"]})
+@user.route("/api/delete_prompt", methods=["POST"])
+def delete_prompt():
+    data = request.get_json()
+    id = data["id"]
+    prompts_collection.delete_one(
+        {
+            "_id": ObjectId(id),
+        }
+    )
+    return {"status": "ok"}
+@user.route("/api/update_prompt", methods=["POST"])
+def update_prompt_name():
+    data = request.get_json()
+    id = data["id"]
+    name = data["name"]
+    content = data["content"]
+    # check if name is null
+    if name == "":
+        return {"status": "error"}
+    prompts_collection.update_one({"_id": ObjectId(id)},{"$set":{"name":name, "content": content}})
+    return {"status": "ok"}

api/user/tasks.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from application.worker import ingest_worker
+from application.celery import celery
+@celery.task(bind=True)
+def ingest(self, directory, formats, name_job, filename, user):
+    resp = ingest_worker(self, directory, formats, name_job, filename, user)
+    return resp

app.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import platform
+import dotenv
+from application.celery import celery
+from flask import Flask, request, redirect
+from application.core.settings import settings
+from application.api.user.routes import user
+from application.api.answer.routes import answer
+from application.api.internal.routes import internal
+if platform.system() == "Windows":
+    import pathlib
+    pathlib.PosixPath = pathlib.WindowsPath
+dotenv.load_dotenv()
+app = Flask(__name__)
+app.register_blueprint(user)
+app.register_blueprint(answer)
+app.register_blueprint(internal)
+app.config.update(
+    UPLOAD_FOLDER="inputs",
+    CELERY_BROKER_URL=settings.CELERY_BROKER_URL,
+    CELERY_RESULT_BACKEND=settings.CELERY_RESULT_BACKEND,
+    MONGO_URI=settings.MONGO_URI
+)
+celery.config_from_object("application.celeryconfig")
+@app.route("/")
+def home():
+    if request.remote_addr in ('0.0.0.0', '127.0.0.1', 'localhost', '172.18.0.1'):
+        return redirect('http://localhost:5173')
+    else:
+        return 'Welcome to DocsGPT Backend!'
+@app.after_request
+def after_request(response):
+    response.headers.add("Access-Control-Allow-Origin", "*")
+    response.headers.add("Access-Control-Allow-Headers", "Content-Type,Authorization")
+    response.headers.add("Access-Control-Allow-Methods", "GET,PUT,POST,DELETE,OPTIONS")
+    return response
+if __name__ == "__main__":
+    app.run(debug=True, port=7091)

celery.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from celery import Celery
+from application.core.settings import settings
+def make_celery(app_name=__name__):
+    celery = Celery(app_name, broker=settings.CELERY_BROKER_URL, backend=settings.CELERY_RESULT_BACKEND)
+    celery.conf.update(settings)
+    return celery
+celery = make_celery()

celeryconfig.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import os
+broker_url = os.getenv("CELERY_BROKER_URL")
+result_backend = os.getenv("CELERY_RESULT_BACKEND")
+task_serializer = 'json'
+result_serializer = 'json'
+accept_content = ['json']

core/__init__.py ADDED Viewed

File without changes

core/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (155 Bytes). View file

core/__pycache__/settings.cpython-310.pyc ADDED Viewed

Binary file (1.92 kB). View file

core/settings.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from pathlib import Path
+from typing import Optional
+import os
+from pydantic_settings import BaseSettings
+current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+class Settings(BaseSettings):
+    LLM_NAME: str = "docsgpt"
+    EMBEDDINGS_NAME: str = "huggingface_sentence-transformers/all-mpnet-base-v2"
+    CELERY_BROKER_URL: str = "redis://localhost:6379/0"
+    CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
+    MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
+    MODEL_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
+    TOKENS_MAX_HISTORY: int = 150
+    UPLOAD_FOLDER: str = "inputs"
+    VECTOR_STORE: str = "faiss"  # "faiss" or "elasticsearch"
+    API_URL: str = "http://localhost:7091"  # backend url for celery worker
+    API_KEY: Optional[str] = None  # LLM api key
+    EMBEDDINGS_KEY: Optional[str] = None  # api key for embeddings (if using openai, just copy API_KEY)
+    OPENAI_API_BASE: Optional[str] = None  # azure openai api base url
+    OPENAI_API_VERSION: Optional[str] = None  # azure openai api version
+    AZURE_DEPLOYMENT_NAME: Optional[str] = None  # azure deployment name for answering
+    AZURE_EMBEDDINGS_DEPLOYMENT_NAME: Optional[str] = None  # azure deployment name for embeddings
+    # elasticsearch
+    ELASTIC_CLOUD_ID: Optional[str] = None # cloud id for elasticsearch
+    ELASTIC_USERNAME: Optional[str] = None # username for elasticsearch
+    ELASTIC_PASSWORD: Optional[str] = None # password for elasticsearch
+    ELASTIC_URL: Optional[str] = None # url for elasticsearch
+    ELASTIC_INDEX: Optional[str] = "docsgpt" # index name for elasticsearch
+    # SageMaker config
+    SAGEMAKER_ENDPOINT: Optional[str] = None # SageMaker endpoint name
+    SAGEMAKER_REGION: Optional[str] = None # SageMaker region name
+    SAGEMAKER_ACCESS_KEY: Optional[str] = None # SageMaker access key
+    SAGEMAKER_SECRET_KEY: Optional[str] = None # SageMaker secret key
+path = Path(__file__).parent.parent.absolute()
+settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8")

error.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from flask import jsonify
+from werkzeug.http import HTTP_STATUS_CODES
+def response_error(code_status, message=None):
+    payload = {'error': HTTP_STATUS_CODES.get(code_status, "something went wrong")}
+    if message:
+        payload['message'] = message
+    response = jsonify(payload)
+    response.status_code = code_status
+    return response
+def bad_request(status_code=400, message=''):
+    return response_error(code_status=status_code, message=message)

index.faiss ADDED Viewed

Binary file (9.26 kB). View file

index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1653826159295b5a262df5228ec9678a919a9fcc3ff94248eeaa55f434c071ef
+size 7866

indexes/local/patil2016.pdf/index.faiss ADDED Viewed

Binary file (15.4 kB). View file

indexes/local/patil2016.pdf/index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ccc1aa0edd32b66234b113edba42b67f5fc498851e584863124f44abf3920273
+size 28255

inputs/local/patil2016.pdf/patil2016.pdf ADDED Viewed

Binary file (280 kB). View file

llm/__init__.py ADDED Viewed

File without changes

llm/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (154 Bytes). View file

llm/__pycache__/anthropic.cpython-310.pyc ADDED Viewed

Binary file (1.65 kB). View file

llm/__pycache__/base.cpython-310.pyc ADDED Viewed

Binary file (734 Bytes). View file

llm/__pycache__/docsgpt_provider.cpython-310.pyc ADDED Viewed

Binary file (1.59 kB). View file

llm/__pycache__/huggingface.cpython-310.pyc ADDED Viewed

Binary file (1.81 kB). View file

llm/__pycache__/llama_cpp.cpython-310.pyc ADDED Viewed

Binary file (1.58 kB). View file

llm/__pycache__/llm_creator.cpython-310.pyc ADDED Viewed

Binary file (1.15 kB). View file

llm/__pycache__/openai.cpython-310.pyc ADDED Viewed

Binary file (2.16 kB). View file

llm/__pycache__/sagemaker.cpython-310.pyc ADDED Viewed

Binary file (4.33 kB). View file

llm/anthropic.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from application.llm.base import BaseLLM
+from application.core.settings import settings
+class AnthropicLLM(BaseLLM):
+    def __init__(self, api_key=None):
+        from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
+        self.api_key = api_key or settings.ANTHROPIC_API_KEY  # If not provided, use a default from settings
+        self.anthropic = Anthropic(api_key=self.api_key)
+        self.HUMAN_PROMPT = HUMAN_PROMPT
+        self.AI_PROMPT = AI_PROMPT
+    def gen(self, model, messages, engine=None, max_tokens=300, stream=False, **kwargs):
+        context = messages[0]['content']
+        user_question = messages[-1]['content']
+        prompt = f"### Context \n {context} \n ### Question \n {user_question}"
+        if stream:
+            return self.gen_stream(model, prompt, max_tokens, **kwargs)
+        completion = self.anthropic.completions.create(
+            model=model,
+            max_tokens_to_sample=max_tokens,
+            stream=stream,
+            prompt=f"{self.HUMAN_PROMPT} {prompt}{self.AI_PROMPT}",
+        )
+        return completion.completion
+    def gen_stream(self, model, messages, engine=None, max_tokens=300, **kwargs):
+        context = messages[0]['content']
+        user_question = messages[-1]['content']
+        prompt = f"### Context \n {context} \n ### Question \n {user_question}"
+        stream_response = self.anthropic.completions.create(
+            model=model,
+            prompt=f"{self.HUMAN_PROMPT} {prompt}{self.AI_PROMPT}",
+            max_tokens_to_sample=max_tokens,
+            stream=True,
+        )
+        for completion in stream_response:
+            yield completion.completion

llm/base.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from abc import ABC, abstractmethod
+class BaseLLM(ABC):
+    def __init__(self):
+        pass
+    @abstractmethod
+    def gen(self, *args, **kwargs):
+        pass
+    @abstractmethod
+    def gen_stream(self, *args, **kwargs):
+        pass

llm/docsgpt_provider.py ADDED Viewed

	@@ -0,0 +1,49 @@

+from application.llm.base import BaseLLM
+import json
+import requests
+class DocsGPTAPILLM(BaseLLM):
+    def __init__(self, *args, **kwargs):
+        self.endpoint =  "https://llm.docsgpt.co.uk"
+    def gen(self, model, engine, messages, stream=False, **kwargs):
+        context = messages[0]['content']
+        user_question = messages[-1]['content']
+        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
+        response = requests.post(
+            f"{self.endpoint}/answer",
+            json={
+                "prompt": prompt,
+                "max_new_tokens": 30
+            }
+        )
+        response_clean = response.json()['a'].split("###")[0]
+        return response_clean
+    def gen_stream(self, model, engine, messages, stream=True, **kwargs):
+        context = messages[0]['content']
+        user_question = messages[-1]['content']
+        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
+        # send prompt to endpoint /stream
+        response = requests.post(
+            f"{self.endpoint}/stream",
+            json={
+                "prompt": prompt,
+                "max_new_tokens": 256
+            },
+            stream=True
+        )
+        for line in response.iter_lines():
+            if line:
+                #data = json.loads(line)
+                data_str = line.decode('utf-8')
+                if data_str.startswith("data: "):
+                    data = json.loads(data_str[6:])
+                    yield data['a']

llm/huggingface.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from application.llm.base import BaseLLM
+class HuggingFaceLLM(BaseLLM):
+    def __init__(self, api_key, llm_name='Arc53/DocsGPT-7B',q=False):
+        global hf
+        from langchain.llms import HuggingFacePipeline
+        if q:
+            import torch
+            from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
+            tokenizer = AutoTokenizer.from_pretrained(llm_name)
+            bnb_config = BitsAndBytesConfig(
+                            load_in_4bit=True,
+                            bnb_4bit_use_double_quant=True,
+                            bnb_4bit_quant_type="nf4",
+                            bnb_4bit_compute_dtype=torch.bfloat16
+                        )
+            model = AutoModelForCausalLM.from_pretrained(llm_name,quantization_config=bnb_config)
+        else:
+            from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+            tokenizer = AutoTokenizer.from_pretrained(llm_name)
+            model = AutoModelForCausalLM.from_pretrained(llm_name)
+        pipe = pipeline(
+            "text-generation", model=model,
+            tokenizer=tokenizer, max_new_tokens=2000,
+            device_map="auto", eos_token_id=tokenizer.eos_token_id
+        )
+        hf = HuggingFacePipeline(pipeline=pipe)
+    def gen(self, model, engine, messages, stream=False, **kwargs):
+        context = messages[0]['content']
+        user_question = messages[-1]['content']
+        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
+        result = hf(prompt)
+        return result.content
+    def gen_stream(self, model, engine, messages, stream=True, **kwargs):
+        raise NotImplementedError("HuggingFaceLLM Streaming is not implemented yet.")

llm/llama_cpp.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from application.llm.base import BaseLLM
+from application.core.settings import settings
+class LlamaCpp(BaseLLM):
+    def __init__(self, api_key, llm_name=settings.MODEL_PATH, **kwargs):
+        global llama
+        try:
+            from llama_cpp import Llama
+        except ImportError:
+            raise ImportError("Please install llama_cpp using pip install llama-cpp-python")
+        llama = Llama(model_path=llm_name, n_ctx=2048)
+    def gen(self, model, engine, messages, stream=False, **kwargs):
+        context = messages[0]['content']
+        user_question = messages[-1]['content']
+        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
+        result = llama(prompt, max_tokens=150, echo=False)
+        # import sys
+        # print(result['choices'][0]['text'].split('### Answer \n')[-1], file=sys.stderr)
+        return result['choices'][0]['text'].split('### Answer \n')[-1]
+    def gen_stream(self, model, engine, messages, stream=True, **kwargs):
+        context = messages[0]['content']
+        user_question = messages[-1]['content']
+        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
+        result = llama(prompt, max_tokens=150, echo=False, stream=stream)
+        # import sys
+        # print(list(result), file=sys.stderr)
+        for item in result:
+            for choice in item['choices']:
+                yield choice['text']

llm/llm_creator.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from application.llm.openai import OpenAILLM, AzureOpenAILLM
+from application.llm.sagemaker import SagemakerAPILLM
+from application.llm.huggingface import HuggingFaceLLM
+from application.llm.llama_cpp import LlamaCpp
+from application.llm.anthropic import AnthropicLLM
+from application.llm.docsgpt_provider import DocsGPTAPILLM
+class LLMCreator:
+    llms = {
+        'openai': OpenAILLM,
+        'azure_openai': AzureOpenAILLM,
+        'sagemaker': SagemakerAPILLM,
+        'huggingface': HuggingFaceLLM,
+        'llama.cpp': LlamaCpp,
+        'anthropic': AnthropicLLM,
+        'docsgpt': DocsGPTAPILLM
+    }
+    @classmethod
+    def create_llm(cls, type, *args, **kwargs):
+        llm_class = cls.llms.get(type.lower())
+        if not llm_class:
+            raise ValueError(f"No LLM class found for type {type}")
+        return llm_class(*args, **kwargs)

llm/openai.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from application.llm.base import BaseLLM
+from application.core.settings import settings
+class OpenAILLM(BaseLLM):
+    def __init__(self, api_key):
+        global openai
+        from openai import OpenAI
+        self.client = OpenAI(
+                api_key=api_key,
+            )
+        self.api_key = api_key
+    def _get_openai(self):
+        # Import openai when needed
+        import openai
+        return openai
+    def gen(self, model, engine, messages, stream=False, **kwargs):
+        response = self.client.chat.completions.create(model=model,
+            messages=messages,
+            stream=stream,
+            **kwargs)
+        return response.choices[0].message.content
+    def gen_stream(self, model, engine, messages, stream=True, **kwargs):
+        response = self.client.chat.completions.create(model=model,
+            messages=messages,
+            stream=stream,
+            **kwargs)
+        for line in response:
+            # import sys
+            # print(line.choices[0].delta.content, file=sys.stderr)
+            if line.choices[0].delta.content is not None:
+                yield line.choices[0].delta.content
+class AzureOpenAILLM(OpenAILLM):
+    def __init__(self, openai_api_key, openai_api_base, openai_api_version, deployment_name):
+        super().__init__(openai_api_key)
+        self.api_base = settings.OPENAI_API_BASE,
+        self.api_version = settings.OPENAI_API_VERSION,
+        self.deployment_name = settings.AZURE_DEPLOYMENT_NAME,
+        from openai import AzureOpenAI
+        self.client = AzureOpenAI(
+            api_key=openai_api_key,
+            api_version=settings.OPENAI_API_VERSION,
+            api_base=settings.OPENAI_API_BASE,
+            deployment_name=settings.AZURE_DEPLOYMENT_NAME,
+        )
+    def _get_openai(self):
+        openai = super()._get_openai()
+        return openai

llm/sagemaker.py ADDED Viewed

	@@ -0,0 +1,139 @@

+from application.llm.base import BaseLLM
+from application.core.settings import settings
+import json
+import io
+class LineIterator:
+    """
+    A helper class for parsing the byte stream input.
+    The output of the model will be in the following format:
+    ```
+    b'{"outputs": [" a"]}\n'
+    b'{"outputs": [" challenging"]}\n'
+    b'{"outputs": [" problem"]}\n'
+    ...
+    ```
+    While usually each PayloadPart event from the event stream will contain a byte array
+    with a full json, this is not guaranteed and some of the json objects may be split across
+    PayloadPart events. For example:
+    ```
+    {'PayloadPart': {'Bytes': b'{"outputs": '}}
+    {'PayloadPart': {'Bytes': b'[" problem"]}\n'}}
+    ```
+    This class accounts for this by concatenating bytes written via the 'write' function
+    and then exposing a method which will return lines (ending with a '\n' character) within
+    the buffer via the 'scan_lines' function. It maintains the position of the last read
+    position to ensure that previous bytes are not exposed again.
+    """
+    def __init__(self, stream):
+        self.byte_iterator = iter(stream)
+        self.buffer = io.BytesIO()
+        self.read_pos = 0
+    def __iter__(self):
+        return self
+    def __next__(self):
+        while True:
+            self.buffer.seek(self.read_pos)
+            line = self.buffer.readline()
+            if line and line[-1] == ord('\n'):
+                self.read_pos += len(line)
+                return line[:-1]
+            try:
+                chunk = next(self.byte_iterator)
+            except StopIteration:
+                if self.read_pos < self.buffer.getbuffer().nbytes:
+                    continue
+                raise
+            if 'PayloadPart' not in chunk:
+                print('Unknown event type:' + chunk)
+                continue
+            self.buffer.seek(0, io.SEEK_END)
+            self.buffer.write(chunk['PayloadPart']['Bytes'])
+class SagemakerAPILLM(BaseLLM):
+    def __init__(self, *args, **kwargs):
+        import boto3
+        runtime = boto3.client(
+            'runtime.sagemaker',
+            aws_access_key_id='xxx',
+            aws_secret_access_key='xxx',
+            region_name='us-west-2'
+        )
+        self.endpoint =  settings.SAGEMAKER_ENDPOINT
+        self.runtime = runtime
+    def gen(self, model, engine, messages, stream=False, **kwargs):
+        context = messages[0]['content']
+        user_question = messages[-1]['content']
+        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
+        # Construct payload for endpoint
+        payload = {
+            "inputs": prompt,
+            "stream": False,
+            "parameters": {
+                "do_sample": True,
+                "temperature": 0.1,
+                "max_new_tokens": 30,
+                "repetition_penalty": 1.03,
+                "stop": ["</s>", "###"]
+            }
+        }
+        body_bytes = json.dumps(payload).encode('utf-8')
+        # Invoke the endpoint
+        response = self.runtime.invoke_endpoint(EndpointName=self.endpoint,
+                                        ContentType='application/json',
+                                        Body=body_bytes)
+        result = json.loads(response['Body'].read().decode())
+        import sys
+        print(result[0]['generated_text'], file=sys.stderr)
+        return result[0]['generated_text'][len(prompt):]
+    def gen_stream(self, model, engine, messages, stream=True, **kwargs):
+        context = messages[0]['content']
+        user_question = messages[-1]['content']
+        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
+        # Construct payload for endpoint
+        payload = {
+            "inputs": prompt,
+            "stream": True,
+            "parameters": {
+                "do_sample": True,
+                "temperature": 0.1,
+                "max_new_tokens": 512,
+                "repetition_penalty": 1.03,
+                "stop": ["</s>", "###"]
+            }
+        }
+        body_bytes = json.dumps(payload).encode('utf-8')
+        # Invoke the endpoint
+        response = self.runtime.invoke_endpoint_with_response_stream(EndpointName=self.endpoint,
+                                        ContentType='application/json',
+                                        Body=body_bytes)
+        #result = json.loads(response['Body'].read().decode())
+        event_stream = response['Body']
+        start_json = b'{'
+        for line in LineIterator(event_stream):
+            if line != b'' and start_json in line:
+                #print(line)
+                data = json.loads(line[line.find(start_json):].decode('utf-8'))
+                if data['token']['text'] not in ["</s>", "###"]:
+                    print(data['token']['text'],end='')
+                    yield data['token']['text']

parser/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+