Spaces:

momegas
/

pmi

Sleeping

App Files Files Community

momegas commited on Oct 20, 2023

Commit

033ca0b

•

1 Parent(s): f145d3d

Upload 2 files

Browse files

Files changed (2) hide show

app.py +114 -0
requirements.txt +141 -0

app.py ADDED Viewed

	@@ -0,0 +1,114 @@

+from ast import List
+from langchain.document_loaders import DirectoryLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+import dotenv
+from langchain.prompts import PromptTemplate
+import gradio as gr
+from langchain import PromptTemplate, LLMChain
+import requests
+from fastembed.embedding import FlagEmbedding as Embedding
+import numpy as np
+import os
+dotenv.load_dotenv()
+api_token = os.environ.get("API_TOKEN")
+API_URL = "https://vpb8x4glbmizmiya.eu-west-1.aws.endpoints.huggingface.cloud"
+headers = {
+    "Authorization": f"Bearer {api_token}",
+    "Content-Type": "application/json",
+}
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+def get_top_k(query_embedding, embeddings, documents, k=3):
+    # use numpy to calculate the cosine similarity between the query and the documents
+    scores = np.dot(embeddings, query_embedding)
+    # sort the scores in descending order
+    sorted_scores = np.argsort(scores)[::-1]
+    # print the top 5
+    result = []
+    for i in range(k):
+        print(f"Rank {i+1}: {documents[sorted_scores[i]]}", "\n")
+        result.append(documents[sorted_scores[i]])
+    return result
+prompt_template = """
+You are the helpful assistant representing the company Philip Morris.
+If you don't know the answer, just say that you don't know, don't try to make up an answer.
+"Use the following pieces of context to answer the question at the end.
+Context:
+{context}
+Question: {question}
+Answer:
+"""
+PROMPT = PromptTemplate(
+    template=prompt_template, input_variables=["context", "question"]
+)
+loader = DirectoryLoader("./documents", glob="**/*.txt", show_progress=True)
+docs = loader.load()
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=150)
+texts = text_splitter.split_documents(docs)
+embedding_model = Embedding(model_name="BAAI/bge-base-en", max_length=512)
+embeddings = list(embedding_model.embed([text.page_content for text in texts]))
+with gr.Blocks() as demo:
+    chatbot = gr.Chatbot()
+    msg = gr.Textbox()
+    clear = gr.ClearButton([msg, chatbot])
+    def respond(message, chat_history):
+        message_embedding = list(embedding_model.embed([message]))[0]
+        result_docs = get_top_k(message_embedding, embeddings, texts, k=3)
+        human_message = HumanMessage(
+            content=PROMPT.format(context=result_docs, question=message)
+        )
+        print("Question: ", human_message)
+        output = query(
+            {
+                "inputs": human_message.content,
+                "parameters": {
+                    "temperature": 0.9,
+                    "top_p": 0.95,
+                    "repetition_penalty": 1.2,
+                    "top_k": 50,
+                    "truncate": 1000,
+                    "max_new_tokens": 1024,
+                },
+            }
+        )
+        print("Response: ", output, "\n")
+        bot_message = ""
+        if output[0]["generated_text"]:
+            bot_message = f"""{output[0]["generated_text"]}
+        Sources:
+        {[doc.page_content for doc in result_docs]}
+        """
+        else:
+            bot_message = f'There was an error: {output[0]["error"]}'
+        chat_history.append((message, bot_message))
+        return "", chat_history
+    msg.submit(respond, [msg, chatbot], [msg, chatbot])
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,141 @@

+aiofiles==23.2.1
+aiohttp==3.8.5
+aiosignal==1.3.1
+altair==5.1.1
+annotated-types==0.5.0
+antlr4-python3-runtime==4.9.3
+anyio==3.7.1
+async-timeout==4.0.3
+attrs==23.1.0
+backoff==2.2.1
+bcrypt==4.0.1
+beautifulsoup4==4.12.2
+certifi==2023.7.22
+cffi==1.16.0
+chardet==5.2.0
+charset-normalizer==3.3.0
+chroma-hnswlib==0.7.3
+chromadb==0.4.13
+click==8.1.7
+coloredlogs==15.0.1
+contourpy==1.1.1
+cryptography==41.0.4
+ctransformers==0.2.27
+cycler==0.12.0
+dataclasses-json==0.6.1
+effdet==0.4.1
+emoji==2.8.0
+fastapi==0.103.2
+fastembed==0.1.1
+ffmpy==0.3.1
+filelock==3.12.4
+filetype==1.2.0
+flatbuffers==23.5.26
+fonttools==4.43.0
+frozenlist==1.4.0
+fsspec==2023.9.2
+gradio==3.45.2
+gradio_client==0.5.3
+h11==0.14.0
+httpcore==0.18.0
+httptools==0.6.0
+httpx==0.25.0
+huggingface-hub==0.16.4
+humanfriendly==10.0
+idna==3.4
+importlib-resources==6.1.0
+iopath==0.1.10
+Jinja2==3.1.2
+joblib==1.3.2
+jsonpatch==1.33
+jsonpointer==2.4
+jsonschema==4.19.1
+jsonschema-specifications==2023.7.1
+kiwisolver==1.4.5
+langchain==0.0.305
+langdetect==1.0.9
+langsmith==0.0.41
+layoutparser==0.3.4
+lxml==4.9.3
+MarkupSafe==2.1.3
+marshmallow==3.20.1
+matplotlib==3.8.0
+monotonic==1.6
+mpmath==1.3.0
+multidict==6.0.4
+mypy-extensions==1.0.0
+networkx==3.2
+nltk==3.8.1
+numexpr==2.8.7
+numpy==1.26.0
+omegaconf==2.3.0
+onnx==1.14.1
+onnxruntime==1.16.0
+openai==0.28.1
+opencv-python==4.8.1.78
+orjson==3.9.7
+overrides==7.4.0
+packaging==23.2
+pandas==2.1.1
+pdf2image==1.16.3
+pdfminer.six==20221105
+pdfplumber==0.10.2
+Pillow==10.0.1
+portalocker==2.8.2
+posthog==3.0.2
+protobuf==4.24.3
+pulsar-client==3.3.0
+py-cpuinfo==9.0.0
+pycocotools==2.0.7
+pycparser==2.21
+pydantic==2.4.2
+pydantic_core==2.10.1
+pydub==0.25.1
+pyparsing==3.1.1
+pypdfium2==4.21.0
+PyPika==0.48.9
+pytesseract==0.3.10
+python-dateutil==2.8.2
+python-dotenv==1.0.0
+python-iso639==2023.6.15
+python-magic==0.4.27
+python-multipart==0.0.6
+pytz==2023.3.post1
+PyYAML==6.0.1
+rapidfuzz==3.4.0
+referencing==0.30.2
+regex==2023.8.8
+requests==2.31.0
+rpds-py==0.10.3
+safetensors==0.4.0
+scipy==1.11.3
+semantic-version==2.10.0
+six==1.16.0
+sniffio==1.3.0
+soupsieve==2.5
+SQLAlchemy==2.0.21
+starlette==0.27.0
+sympy==1.12
+tabulate==0.9.0
+tenacity==8.2.3
+tiktoken==0.5.1
+timm==0.9.7
+tokenizers==0.14.1
+toolz==0.12.0
+torch==2.1.0
+torchvision==0.16.0
+tqdm==4.66.1
+transformers==4.34.1
+typer==0.9.0
+typing-inspect==0.9.0
+typing_extensions==4.8.0
+tzdata==2023.3
+unstructured==0.10.18
+unstructured-inference==0.5.31
+unstructured.pytesseract==0.3.12
+urllib3==2.0.5
+uvicorn==0.23.2
+uvloop==0.17.0
+watchfiles==0.20.0
+websockets==11.0.3
+yarl==1.9.2