Spaces:

DeepVen
/

rag-test-venkat

Paused

App Files Files Community

DeepVen commited on Oct 4, 2023

Commit

ed62434

1 Parent(s): 5aef1e5

Upload 8 files

Browse files

switch to langchain

Files changed (2) hide show

Index.py +85 -211
requirements.txt +4 -6

Index.py CHANGED Viewed

@@ -1,63 +1,80 @@
 from fastapi import FastAPI
-# from transformers import pipeline
-from txtai.embeddings import Embeddings
-from txtai.pipeline import Extractor
 from langchain.document_loaders import WebBaseLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain import HuggingFaceHub
-from langchain.prompts import PromptTemplate
-from langchain.chains import LLMChain
-from txtai.embeddings import Embeddings
-from txtai.pipeline import Extractor
-import pandas as pd
-import sqlite3
-import os
 # NOTE - we configure docs_url to serve the interactive Docs at the root path
 # of the app. This way, we can use the docs as a landing page for the app on Spaces.
 app = FastAPI(docs_url="/")
-# app = FastAPI()
-# pipe = pipeline("text2text-generation", model="google/flan-t5-small")
-# @app.get("/generate")
-# def generate(text: str):
-#     """
-#     Using the text2text-generation pipeline from `transformers`, generate text
-#     from the given input text. The model used is `google/flan-t5-small`, which
-#     can be found [here](https://huggingface.co/google/flan-t5-small).
-#     """
-#     output = pipe(text)
-#     return {"output": output[0]["generated_text"]}
-def load_embeddings(
-    domain: str = "",
-    db_present: bool = True,
-    path: str = "sentence-transformers/all-MiniLM-L6-v2",
-    index_name: str = "index",
-):
-    # Create embeddings model with content support
-    embeddings = Embeddings({"path": path, "content": True})
-    # if Vector DB is not present
-    if not db_present:
-        return embeddings
-    else:
-        if domain == "":
-            embeddings.load(index_name)  # change this later
-        else:
-            print(3)
-            embeddings.load(f"{index_name}/{domain}")
-        return embeddings
-def _check_if_db_exists(db_path: str) -> bool:
-    return os.path.exists(db_path)
 def _text_splitter(doc):
@@ -68,189 +85,46 @@ def _text_splitter(doc):
     )
     return text_splitter.transform_documents(doc)
 def _load_docs(path: str):
     load_doc = WebBaseLoader(path).load()
     doc = _text_splitter(load_doc)
     return doc
-def _stream(dataset, limit, index: int = 0):
-    for row in dataset:
-        yield (index, row.page_content, None)
-        index += 1
-        if index >= limit:
-            break
-def _max_index_id(path):
-    db = sqlite3.connect(path)
-    table = "sections"
-    df = pd.read_sql_query(f"select * from {table}", db)
-    return {"max_index": df["indexid"].max()}
-def _upsert_docs(doc, embeddings, vector_doc_path: str, db_present: bool):
-    print(vector_doc_path)
-    if db_present:
-        print(1)
-        max_index = _max_index_id(f"{vector_doc_path}/documents")
-        print(max_index)
-        embeddings.upsert(_stream(doc, 500, max_index["max_index"]))
-        print("Embeddings done!!")
-        embeddings.save(vector_doc_path)
-        print("Embeddings done - 1!!")
-    else:
-        print(2)
-        embeddings.index(_stream(doc, 500, 0))
-        embeddings.save(vector_doc_path)
-        max_index = _max_index_id(f"{vector_doc_path}/documents")
-        print(max_index)
-    # check
-    # max_index = _max_index_id(f"{vector_doc_path}/documents")
-    # print(max_index)
-    return max_index
-# def prompt(question):
-#     return f"""Answer the following question using only the context below. Say 'no answer' when the question can't be answered.
-#             Question: {question}
-#             Context: """
-# def search(query, question=None):
-#     # Default question to query if empty
-#     if not question:
-#         question = query
-#     return extractor([("answer", query, prompt(question), False)])[0][1]
-# @app.get("/rag")
-# def rag(question: str):
-#     # question = "what is the document about?"
-#     answer = search(question)
-#     # print(question, answer)
-#     return {answer}
-# @app.get("/index")
-# def get_url_file_path(url_path: str):
-#     embeddings = load_embeddings()
-#     doc = _load_docs(url_path)
-#     embeddings, max_index = _upsert_docs(doc, embeddings)
-#     return max_index
-@app.get("/index/{domain}/")
-def get_domain_file_path(domain: str, file_path: str):
-    print(domain, file_path)
-    print(os.getcwd())
-    bool_value = _check_if_db_exists(db_path=f"{os.getcwd()}/index/{domain}/documents")
-    print(bool_value)
-    if bool_value:
-        embeddings = load_embeddings(domain=domain, db_present=bool_value)
-        print(embeddings)
-        doc = _load_docs(file_path)
-        max_index = _upsert_docs(
-            doc=doc,
-            embeddings=embeddings,
-            vector_doc_path=f"{os.getcwd()}/index/{domain}",
-            db_present=bool_value,
-        )
-        # print("-------")
-    else:
-        embeddings = load_embeddings(domain=domain, db_present=bool_value)
-        doc = _load_docs(file_path)
-        max_index = _upsert_docs(
-            doc=doc,
-            embeddings=embeddings,
-            vector_doc_path=f"{os.getcwd()}/index/{domain}",
-            db_present=bool_value,
-        )
-    # print("Final - output : ", max_index)
-    return "Executed Successfully!!"
-def _check_if_db_exists(db_path: str) -> bool:
-    return os.path.exists(db_path)
-def _load_embeddings_from_db(
-    db_present: bool,
-    domain: str,
-    #path: str = "sentence-transformers/all-MiniLM-L6-v2",
-    path: str = "sentence-transformers/nli-mpnet-base-v2",
-):
-    # Create embeddings model with content support
-    embeddings = Embeddings({"path": path, "content": True})
-    # if Vector DB is not present
-    if not db_present:
-        print("db not present")
-        return embeddings
-    else:
-        if domain == "":
-            print("domain empty")
-            embeddings.load("index")  # change this later
-        else:
-            print(3)
-            embeddings.load(f"{os.getcwd()}/index/{domain}")
-        return embeddings
 def _prompt(question):
-    return f"""Answer the following question using only the context below. Say 'Could not find answer within the context' when the question can't be answered.
             Question: {question}
             Context: """
-def _search(query, extractor, question=None):
-    # Default question to query if empty
-    if not question:
-        question = query
-    # template = f"""Answer the following question using only the context below. Say 'no answer' when the question can't be answered.
-    #         Question: {question}
-    #         Context: """
-    # prompt = PromptTemplate(template=template, input_variables=["question"])
-    # llm_chain = LLMChain(prompt=prompt, llm=extractor)
-    # return {"question": question, "answer": llm_chain.run(question)}
-    print(extractor([("answer", query, _prompt(question), True)]))
-    return extractor([("answer", query, _prompt(question), False)])[0][1]
-@app.get("/rag")
-def rag(domain: str, question: str):
-    print()
-    db_exists = _check_if_db_exists(db_path=f"{os.getcwd()}/index/{domain}/documents")
-    print(db_exists)
-    bool_value = _check_if_db_exists(db_path=f"{os.getcwd()}/index/{domain}/documents")
-    print(bool_value)
-    # if db_exists:
-    embeddings = _load_embeddings_from_db(db_exists, domain)
-    # Create extractor instance
-    #extractor = Extractor(embeddings, "google/flan-t5-base")
-    #extractor = Extractor(embeddings, "TheBloke/Llama-2-7B-GGUF")
-    print("before calling extractor")
-    #extractor = Extractor(embeddings, "distilbert-base-cased-distilled-squad")
-    extractor = Extractor(embeddings, "google/flan-t5-base")
-    # llm = HuggingFaceHub(
-    #     repo_id="google/flan-t5-xxl",
-    #     model_kwargs={"temperature": 1, "max_length": 1000000},
-    # )
-    # else:
-    print("before doing Q&A")
-    answer = _search(question, extractor)
-    text = _prompt(question)
-    text += "\n" + " ".join(x["text"] for x in embeddings.search(question))
-    print("context \n")
-    print(text)
-    return {"question": question, "answer": answer, "context": text}

 from fastapi import FastAPI
+import os
+import phoenix as px
+from phoenix.trace.langchain import OpenInferenceTracer, LangChainInstrumentor
+from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models
+from langchain.chains.question_answering import load_qa_chain
+from langchain import HuggingFaceHub
+from langchain.chains import RetrievalQA
+from langchain.callbacks import StdOutCallbackHandler
+#from langchain.retrievers import KNNRetriever
+from langchain.storage import LocalFileStore
+from langchain.embeddings import CacheBackedEmbeddings
+from langchain.vectorstores import FAISS
 from langchain.document_loaders import WebBaseLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+# from langchain import HuggingFaceHub
+# from langchain.prompts import PromptTemplate
+# from langchain.chains import LLMChain
+# from txtai.embeddings import Embeddings
+# from txtai.pipeline import Extractor
+# import pandas as pd
+# import sqlite3
+# import os
 # NOTE - we configure docs_url to serve the interactive Docs at the root path
 # of the app. This way, we can use the docs as a landing page for the app on Spaces.
 app = FastAPI(docs_url="/")
+#phoenix setup
+session = px.launch_app()
+# If no exporter is specified, the tracer will export to the locally running Phoenix server
+tracer = OpenInferenceTracer()
+# If no tracer is specified, a tracer is constructed for you
+LangChainInstrumentor(tracer).instrument()
+print(session.url)
+os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"
+# embedding cache
+store = LocalFileStore("./cache/")
+# define embedder
+core_embeddings_model  = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
+embedder = CacheBackedEmbeddings.from_bytes_store(core_embeddings_model, store)
+# define llm
+llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":1, "max_length":1000000})
+#llm=HuggingFaceHub(repo_id="gpt2", model_kwargs={"temperature":1, "max_length":1000000})
+handler =  StdOutCallbackHandler()
+# set global variable
+vectorstore
+retriever
+def initialize_vectorstore():
+    webpage_loader = WebBaseLoader("https://www.tredence.com/case-studies/tredence-helped-a-global-retailer-providing-holistic-campaign-analytics-by-using-the-power-of-gcp").load()
+    webpage_chunks = text_splitter.transform_documents(webpage_loader)
+    # store embeddings in vector store
+    vectorstore = FAISS.from_documents(webpage_chunks, embedder)
+    print("vector store initialized with sample doc")
+    # instantiate a retriever
+    retriever = vectorstore.as_retriever()
 def _text_splitter(doc):
     )
     return text_splitter.transform_documents(doc)
 def _load_docs(path: str):
     load_doc = WebBaseLoader(path).load()
     doc = _text_splitter(load_doc)
     return doc
+@app.get("/index/")
+def get_domain_file_path(file_path: str):
+    print(file_path)
+    webpage_loader = _load_docs(file_path)
+    webpage_chunks = _text_splitter(webpage_loader)
+    # store embeddings in vector store
+    vectorstore.add_documents(webpage_chunks)
+    return "document loaded to vector store successfully!!"
 def _prompt(question):
+    return f"""Answer following question using only the context below. Say 'Could not find answer with provided context' when question can't be answered.
             Question: {question}
             Context: """
+@app.get("/rag")
+def rag( question: str):
+    chain = RetrievalQA.from_chain_type(
+    llm=llm,
+    retriever=retriever,
+    callbacks=[handler],
+    return_source_documents=True
+    )
+    #response = chain("how tredence brought good insight?")
+    response = chain(_prompt(question))
+    return {"question": question, "answer": response['result']}
+initialize_vectorstore()

requirements.txt CHANGED Viewed

@@ -4,10 +4,8 @@ uvicorn[standard]==0.17.*
 sentencepiece==0.1.*
 torch==1.12.*
 transformers==4.*
-txtai==6.0.*
 langchain==0.0.301
-langsmith==0.0.40
-bs4==0.0.1
-pandas==2.1.1
-SQLAlchemy==2.0.21
-llama-cpp-python

 sentencepiece==0.1.*
 torch==1.12.*
 transformers==4.*
 langchain==0.0.301
+arize-phoenix
+huggingface_hub
+sentence-transformers
+faiss-cpu