Spaces:

kristada673
/

WebDemoLLM

Runtime error

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4a2bebf058c6e947c09f9fdb510010a92f6698b458941956ad0bbdaa043ae6de
-size 1111637

Web Application/10K_Annual_Reports/Microsoft.pdf DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:50507a219c93a452c1a15e1c5bb5d01d53a97d75c1ce91ea0a9703ef7debca95
-size 1547825

Web Application/10K_Annual_Reports/Netflix.pdf DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d95d9d4a03473863582a234e8edfd97eac97f1be9e552f9467e95dd8ce61280e
-size 1410523

Web Application/10K_Annual_Reports/Tesla.pdf DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3a2bfbae724f9f4a7b28539993ca79c54db6ead3ff5105693a546e5a2134bbde
-size 2659773

Web Application/Dockerfile DELETED Viewed

@@ -1,22 +0,0 @@
-FROM python:3.9
-RUN pip install virtualenv && virtualenv venv -p python3
-ENV VIRTUAL_ENV=/venv
-ENV PATH="$VIRTUAL_ENV/bin:$PATH"
-WORKDIR /app
-COPY requirements.txt ./
-RUN pip install -r requirements.txt
-RUN git clone https://github.com/facebookresearch/detectron2.git
-RUN python -m pip install -e detectron2
-# Install dependencies
-RUN apt-get update && apt-get install libgl1 -y
-RUN pip install -U nltk
-RUN [ "python3", "-c", "import nltk; nltk.download('punkt', download_dir='/usr/local/nltk_data')" ]
-COPY . /app
-# Run the application:
-CMD ["python", "-u", "app.py"]

Web Application/Pipfile DELETED Viewed

@@ -1,21 +0,0 @@
-[[source]]
-url = "https://pypi.org/simple"
-verify_ssl = true
-name = "pypi"
-[packages]
-langchain = "*"
-openai = "*"
-pybind11 = "*"
-chromadb = "*"
-cython = "*"
-unstructured = {extras = ["local-inference"], version = "*"}
-layoutparser = {extras = ["layoutmodels", "tesseract"], version = "*"}
-pytesseract = "*"
-pillow = "==9.0.0"
-tiktoken = "*"
-[dev-packages]
-[requires]
-python_version = "3.11"

Web Application/Pipfile.lock DELETED Viewed

The diff for this file is too large to render. See raw diff

Web Application/VectorStoreIndex.zip DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:21120b3b81f1396478c8c377dcebe5a686ee501de7b461a3bf198f8da0eef09c
-size 106261438

Web Application/VectorStoreIndex/chroma-collections.parquet DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:250cf833cc54545b03d2454a5ff23eda3e047f8a3c465d29243f2e697b095848
-size 557

Web Application/VectorStoreIndex/chroma-embeddings.parquet DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:32c73aa28836865bbc6964cb3f8f0a540b9639828e39b6fa3c4ae0cb7fc7a1a3
-size 114611418

Web Application/VectorStoreIndex/index/id_to_uuid_4687da76-fa8c-47cd-96a2-c9f3fc08313a.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c839634ab7858bf13401325e4055d5a3df0dcd5984705ecd5d83a79966363e0e
-size 150307

Web Application/VectorStoreIndex/index/index_4687da76-fa8c-47cd-96a2-c9f3fc08313a.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4133367b79c8a0bed4a21a4885f7d35008f9bc69c9fd0b513eafcfb59faddb0b
-size 29136520

Web Application/VectorStoreIndex/index/index_metadata_4687da76-fa8c-47cd-96a2-c9f3fc08313a.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4be5c2f38188c24bc82c3ae21db9bcbf876838e71e8b95c31435a90f960c26f2
-size 74

Web Application/VectorStoreIndex/index/uuid_to_id_4687da76-fa8c-47cd-96a2-c9f3fc08313a.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ecdc7842716c5b96c8438096e1d1f5a276da5742cf13ea2101f83de45c0f5456
-size 175727

Web Application/__pycache__/vectorstore.cpython-310.pyc DELETED Viewed

Binary file (4.1 kB)

Web Application/app.py DELETED Viewed

@@ -1,46 +0,0 @@
-import os, gradio
-from langchain.document_loaders import UnstructuredPDFLoader
-from langchain.indexes import VectorstoreIndexCreator
-from vectorstore import VectorstoreIndexCreator
-os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
-text_folder = '10K_Annual_Reports'
-loaders = [UnstructuredPDFLoader(os.path.join(text_folder, fn)) for fn in os.listdir(text_folder)]
-# Create the index, if it does not exist, and save it
-if not os.path.isfile('VectorStoreIndex/chroma-embeddings.parquet'):
-  from langchain.vectorstores import Chroma
-  index = VectorstoreIndexCreator(vectorstore_cls=Chroma, vectorstore_kwargs={ "persist_directory": "VectorStoreIndex/"}).from_loaders(loaders)
-  index.vectorstore.persist()
-# Load the saved index
-index_saved = VectorstoreIndexCreator().from_persistent_index("VectorStoreIndex/")
-description = """This is an AI conversational agent where you provide it with the annual reports of companies, and it can study it and answer any questions
-you have about it. Currently, the LLM has been trained on the following companies' 10-K reports: Amazon, Apple, Alphabet (Google), Meta (Facebook), Microsoft,
-Netflix and Tesla.' I plan to include more companies' 10-K reports in future.
-Once the LLM is trained on a new 10-K report, it stores the vector embeddings of the document locally using ChromaDB to make the querying faster and also to
-save time and money on creating the vector embeddings for the same document in future.
-The LLM's universe is only the 10-K reports it has been trained on; it cannot pull information from the internet. So, you can ask it about anything that's
-contained in their 10-K reports. If it cannot find an answer to your query within the 10-K reports, it will reply with "I don't know". Some example of questions
-you can ask are:
-    - What are the risks for Tesla?
-    - What was Google's earnings for the last fiscal year?
-    - Who are the competetors of Apple?
-An example of querying about something the LLM's training did not include:
-    - Query:    "What is Tesco?"
-    - Response: " Tesco is not mentioned in the context, so I don't know."
-"""
-def chat_response(query):
-  return index_saved.query(query)
-interface = gradio.Interface(fn=chat_response, inputs="text", outputs="text", title='Annual Reports GPT', description=description)
-interface.launch() #server_name="0.0.0.0", server_port=8080, share=True)

Web Application/requirements.txt DELETED Viewed

@@ -1,7 +0,0 @@
-gradio
-langchain
-unstructured
-openai
-chromadb
-unstructured
-tiktoken

Web Application/vectorstore.py DELETED Viewed

@@ -1,86 +0,0 @@
-from typing import Any, List, Optional, Type
-from pydantic import BaseModel, Extra, Field
-from langchain.base_language import BaseLanguageModel
-from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
-from langchain.chains.retrieval_qa.base import RetrievalQA
-from langchain.document_loaders.base import BaseLoader
-from langchain.embeddings.base import Embeddings
-from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.llms.openai import OpenAI
-from langchain.schema import Document
-from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter
-from langchain.vectorstores.base import VectorStore
-from langchain.vectorstores.chroma import Chroma
-def _get_default_text_splitter() -> TextSplitter:
-    return RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-class VectorStoreIndexWrapper(BaseModel):
-    """Wrapper around a vectorstore for easy access."""
-    vectorstore: VectorStore
-    class Config:
-        """Configuration for this pydantic object."""
-        extra = Extra.forbid
-        arbitrary_types_allowed = True
-    def query(
-        self, question: str, llm: Optional[BaseLanguageModel] = None, **kwargs: Any
-    ) -> str:
-        """Query the vectorstore."""
-        llm = llm or OpenAI(temperature=0)
-        chain = RetrievalQA.from_chain_type(
-            llm, retriever=self.vectorstore.as_retriever(), **kwargs
-        )
-        return chain.run(question)
-    def query_with_sources(
-        self, question: str, llm: Optional[BaseLanguageModel] = None, **kwargs: Any
-    ) -> dict:
-        """Query the vectorstore and get back sources."""
-        llm = llm or OpenAI(temperature=0)
-        chain = RetrievalQAWithSourcesChain.from_chain_type(
-            llm, retriever=self.vectorstore.as_retriever(), **kwargs
-        )
-        return chain({chain.question_key: question})
-class VectorstoreIndexCreator(BaseModel):
-    """Logic for creating indexes."""
-    vectorstore_cls: Type[VectorStore] = Chroma
-    embedding: Embeddings = Field(default_factory=OpenAIEmbeddings)
-    text_splitter: TextSplitter = Field(default_factory=_get_default_text_splitter)
-    vectorstore_kwargs: dict = Field(default_factory=dict)
-    class Config:
-        """Configuration for this pydantic object."""
-        extra = Extra.forbid
-        arbitrary_types_allowed = True
-    def from_loaders(self, loaders: List[BaseLoader]) -> VectorStoreIndexWrapper:
-        """Create a vectorstore index from loaders."""
-        docs = []
-        for loader in loaders:
-            docs.extend(loader.load())
-        return self.from_documents(docs)
-    def from_documents(self, documents: List[Document]) -> VectorStoreIndexWrapper:
-        """Create a vectorstore index from documents."""
-        sub_docs = self.text_splitter.split_documents(documents)
-        vectorstore = self.vectorstore_cls.from_documents(
-            sub_docs, self.embedding, **self.vectorstore_kwargs
-        )
-        return VectorStoreIndexWrapper(vectorstore=vectorstore)
-    def from_persistent_index(self, path: str) -> VectorStoreIndexWrapper:
-        """Load a vectorstore index from a persistent index."""
-        vectorstore = self.vectorstore_cls(persist_directory=path, embedding_function=self.embedding)
-        return VectorStoreIndexWrapper(vectorstore=vectorstore)