kristada673 commited on
Commit
150092a
1 Parent(s): bc3b8cd

Delete Web Application

Browse files
Web Application/.env DELETED
@@ -1 +0,0 @@
1
- OPENAI_API_KEY="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"
 
 
Web Application/10K_Annual_Reports/Alphabet.pdf DELETED
The diff for this file is too large to render. See raw diff
 
Web Application/10K_Annual_Reports/Amazon.pdf DELETED
The diff for this file is too large to render. See raw diff
 
Web Application/10K_Annual_Reports/Apple.pdf DELETED
The diff for this file is too large to render. See raw diff
 
Web Application/10K_Annual_Reports/Meta.pdf DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a2bebf058c6e947c09f9fdb510010a92f6698b458941956ad0bbdaa043ae6de
3
- size 1111637
 
 
 
 
Web Application/10K_Annual_Reports/Microsoft.pdf DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:50507a219c93a452c1a15e1c5bb5d01d53a97d75c1ce91ea0a9703ef7debca95
3
- size 1547825
 
 
 
 
Web Application/10K_Annual_Reports/Netflix.pdf DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d95d9d4a03473863582a234e8edfd97eac97f1be9e552f9467e95dd8ce61280e
3
- size 1410523
 
 
 
 
Web Application/10K_Annual_Reports/Tesla.pdf DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a2bfbae724f9f4a7b28539993ca79c54db6ead3ff5105693a546e5a2134bbde
3
- size 2659773
 
 
 
 
Web Application/Dockerfile DELETED
@@ -1,22 +0,0 @@
1
- FROM python:3.9
2
-
3
- RUN pip install virtualenv && virtualenv venv -p python3
4
- ENV VIRTUAL_ENV=/venv
5
- ENV PATH="$VIRTUAL_ENV/bin:$PATH"
6
-
7
- WORKDIR /app
8
- COPY requirements.txt ./
9
- RUN pip install -r requirements.txt
10
-
11
- RUN git clone https://github.com/facebookresearch/detectron2.git
12
- RUN python -m pip install -e detectron2
13
-
14
- # Install dependencies
15
- RUN apt-get update && apt-get install libgl1 -y
16
- RUN pip install -U nltk
17
- RUN [ "python3", "-c", "import nltk; nltk.download('punkt', download_dir='/usr/local/nltk_data')" ]
18
-
19
- COPY . /app
20
-
21
- # Run the application:
22
- CMD ["python", "-u", "app.py"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Web Application/Pipfile DELETED
@@ -1,21 +0,0 @@
1
- [[source]]
2
- url = "https://pypi.org/simple"
3
- verify_ssl = true
4
- name = "pypi"
5
-
6
- [packages]
7
- langchain = "*"
8
- openai = "*"
9
- pybind11 = "*"
10
- chromadb = "*"
11
- cython = "*"
12
- unstructured = {extras = ["local-inference"], version = "*"}
13
- layoutparser = {extras = ["layoutmodels", "tesseract"], version = "*"}
14
- pytesseract = "*"
15
- pillow = "==9.0.0"
16
- tiktoken = "*"
17
-
18
- [dev-packages]
19
-
20
- [requires]
21
- python_version = "3.11"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Web Application/Pipfile.lock DELETED
The diff for this file is too large to render. See raw diff
 
Web Application/VectorStoreIndex.zip DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:21120b3b81f1396478c8c377dcebe5a686ee501de7b461a3bf198f8da0eef09c
3
- size 106261438
 
 
 
 
Web Application/VectorStoreIndex/chroma-collections.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:250cf833cc54545b03d2454a5ff23eda3e047f8a3c465d29243f2e697b095848
3
- size 557
 
 
 
 
Web Application/VectorStoreIndex/chroma-embeddings.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:32c73aa28836865bbc6964cb3f8f0a540b9639828e39b6fa3c4ae0cb7fc7a1a3
3
- size 114611418
 
 
 
 
Web Application/VectorStoreIndex/index/id_to_uuid_4687da76-fa8c-47cd-96a2-c9f3fc08313a.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c839634ab7858bf13401325e4055d5a3df0dcd5984705ecd5d83a79966363e0e
3
- size 150307
 
 
 
 
Web Application/VectorStoreIndex/index/index_4687da76-fa8c-47cd-96a2-c9f3fc08313a.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4133367b79c8a0bed4a21a4885f7d35008f9bc69c9fd0b513eafcfb59faddb0b
3
- size 29136520
 
 
 
 
Web Application/VectorStoreIndex/index/index_metadata_4687da76-fa8c-47cd-96a2-c9f3fc08313a.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4be5c2f38188c24bc82c3ae21db9bcbf876838e71e8b95c31435a90f960c26f2
3
- size 74
 
 
 
 
Web Application/VectorStoreIndex/index/uuid_to_id_4687da76-fa8c-47cd-96a2-c9f3fc08313a.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecdc7842716c5b96c8438096e1d1f5a276da5742cf13ea2101f83de45c0f5456
3
- size 175727
 
 
 
 
Web Application/__pycache__/vectorstore.cpython-310.pyc DELETED
Binary file (4.1 kB)
 
Web Application/app.py DELETED
@@ -1,46 +0,0 @@
1
- import os, gradio
2
- from langchain.document_loaders import UnstructuredPDFLoader
3
- from langchain.indexes import VectorstoreIndexCreator
4
- from vectorstore import VectorstoreIndexCreator
5
-
6
- os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
7
-
8
- text_folder = '10K_Annual_Reports'
9
- loaders = [UnstructuredPDFLoader(os.path.join(text_folder, fn)) for fn in os.listdir(text_folder)]
10
-
11
- # Create the index, if it does not exist, and save it
12
- if not os.path.isfile('VectorStoreIndex/chroma-embeddings.parquet'):
13
- from langchain.vectorstores import Chroma
14
- index = VectorstoreIndexCreator(vectorstore_cls=Chroma, vectorstore_kwargs={ "persist_directory": "VectorStoreIndex/"}).from_loaders(loaders)
15
- index.vectorstore.persist()
16
-
17
- # Load the saved index
18
- index_saved = VectorstoreIndexCreator().from_persistent_index("VectorStoreIndex/")
19
-
20
- description = """This is an AI conversational agent where you provide it with the annual reports of companies, and it can study it and answer any questions
21
- you have about it. Currently, the LLM has been trained on the following companies' 10-K reports: Amazon, Apple, Alphabet (Google), Meta (Facebook), Microsoft,
22
- Netflix and Tesla.' I plan to include more companies' 10-K reports in future.
23
-
24
- Once the LLM is trained on a new 10-K report, it stores the vector embeddings of the document locally using ChromaDB to make the querying faster and also to
25
- save time and money on creating the vector embeddings for the same document in future.
26
-
27
- The LLM's universe is only the 10-K reports it has been trained on; it cannot pull information from the internet. So, you can ask it about anything that's
28
- contained in their 10-K reports. If it cannot find an answer to your query within the 10-K reports, it will reply with "I don't know". Some example of questions
29
- you can ask are:
30
-
31
- - What are the risks for Tesla?
32
- - What was Google's earnings for the last fiscal year?
33
- - Who are the competetors of Apple?
34
-
35
- An example of querying about something the LLM's training did not include:
36
-
37
- - Query: "What is Tesco?"
38
- - Response: " Tesco is not mentioned in the context, so I don't know."
39
- """
40
-
41
- def chat_response(query):
42
- return index_saved.query(query)
43
-
44
- interface = gradio.Interface(fn=chat_response, inputs="text", outputs="text", title='Annual Reports GPT', description=description)
45
-
46
- interface.launch() #server_name="0.0.0.0", server_port=8080, share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Web Application/requirements.txt DELETED
@@ -1,7 +0,0 @@
1
- gradio
2
- langchain
3
- unstructured
4
- openai
5
- chromadb
6
- unstructured
7
- tiktoken
 
 
 
 
 
 
 
 
Web Application/vectorstore.py DELETED
@@ -1,86 +0,0 @@
1
- from typing import Any, List, Optional, Type
2
-
3
- from pydantic import BaseModel, Extra, Field
4
-
5
- from langchain.base_language import BaseLanguageModel
6
- from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
7
- from langchain.chains.retrieval_qa.base import RetrievalQA
8
- from langchain.document_loaders.base import BaseLoader
9
- from langchain.embeddings.base import Embeddings
10
- from langchain.embeddings.openai import OpenAIEmbeddings
11
- from langchain.llms.openai import OpenAI
12
- from langchain.schema import Document
13
- from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter
14
- from langchain.vectorstores.base import VectorStore
15
- from langchain.vectorstores.chroma import Chroma
16
-
17
-
18
- def _get_default_text_splitter() -> TextSplitter:
19
- return RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
20
-
21
-
22
- class VectorStoreIndexWrapper(BaseModel):
23
- """Wrapper around a vectorstore for easy access."""
24
-
25
- vectorstore: VectorStore
26
-
27
- class Config:
28
- """Configuration for this pydantic object."""
29
-
30
- extra = Extra.forbid
31
- arbitrary_types_allowed = True
32
-
33
- def query(
34
- self, question: str, llm: Optional[BaseLanguageModel] = None, **kwargs: Any
35
- ) -> str:
36
- """Query the vectorstore."""
37
- llm = llm or OpenAI(temperature=0)
38
- chain = RetrievalQA.from_chain_type(
39
- llm, retriever=self.vectorstore.as_retriever(), **kwargs
40
- )
41
- return chain.run(question)
42
-
43
- def query_with_sources(
44
- self, question: str, llm: Optional[BaseLanguageModel] = None, **kwargs: Any
45
- ) -> dict:
46
- """Query the vectorstore and get back sources."""
47
- llm = llm or OpenAI(temperature=0)
48
- chain = RetrievalQAWithSourcesChain.from_chain_type(
49
- llm, retriever=self.vectorstore.as_retriever(), **kwargs
50
- )
51
- return chain({chain.question_key: question})
52
-
53
-
54
- class VectorstoreIndexCreator(BaseModel):
55
- """Logic for creating indexes."""
56
-
57
- vectorstore_cls: Type[VectorStore] = Chroma
58
- embedding: Embeddings = Field(default_factory=OpenAIEmbeddings)
59
- text_splitter: TextSplitter = Field(default_factory=_get_default_text_splitter)
60
- vectorstore_kwargs: dict = Field(default_factory=dict)
61
-
62
- class Config:
63
- """Configuration for this pydantic object."""
64
-
65
- extra = Extra.forbid
66
- arbitrary_types_allowed = True
67
-
68
- def from_loaders(self, loaders: List[BaseLoader]) -> VectorStoreIndexWrapper:
69
- """Create a vectorstore index from loaders."""
70
- docs = []
71
- for loader in loaders:
72
- docs.extend(loader.load())
73
- return self.from_documents(docs)
74
-
75
- def from_documents(self, documents: List[Document]) -> VectorStoreIndexWrapper:
76
- """Create a vectorstore index from documents."""
77
- sub_docs = self.text_splitter.split_documents(documents)
78
- vectorstore = self.vectorstore_cls.from_documents(
79
- sub_docs, self.embedding, **self.vectorstore_kwargs
80
- )
81
- return VectorStoreIndexWrapper(vectorstore=vectorstore)
82
-
83
- def from_persistent_index(self, path: str) -> VectorStoreIndexWrapper:
84
- """Load a vectorstore index from a persistent index."""
85
- vectorstore = self.vectorstore_cls(persist_directory=path, embedding_function=self.embedding)
86
- return VectorStoreIndexWrapper(vectorstore=vectorstore)