Spaces:
Build error
Build error
llamazookeeper
commited on
Commit
•
016719e
1
Parent(s):
59b9915
- app.py +22 -0
- pages/Tesla_Cases.py +144 -0
- pages/Tesla_Cases1.py +129 -0
- prompts/main.prompt +3 -0
- prompts/rag.prompt +13 -0
- requirements.txt +8 -0
app.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from llama_index import download_loader, SimpleDirectoryReader, StorageContext, load_index_from_storage
|
3 |
+
from llama_index import SimpleDirectoryReader, ServiceContext, StorageContext, VectorStoreIndex, download_loader
|
4 |
+
from llama_index.llms import HuggingFaceLLM
|
5 |
+
from llama_index.embeddings import HuggingFaceEmbedding
|
6 |
+
|
7 |
+
import torch
|
8 |
+
torch.set_default_device('cuda')
|
9 |
+
|
10 |
+
|
11 |
+
st.set_page_config(page_title="Tesla Cases", page_icon="", layout="wide")
|
12 |
+
|
13 |
+
st.title("Tesla Cases \n\n **Tesla Cases Insights at Your Fingertip**")
|
14 |
+
|
15 |
+
#st.balloons()
|
16 |
+
|
17 |
+
st.success("""
|
18 |
+
If you'd like to learn more about the technical details of Tesla cases, check out the LlamaIndex:
|
19 |
+
|
20 |
+
[How I built the Streamlit LLM application using LlamaIndex.])
|
21 |
+
|
22 |
+
""")
|
pages/Tesla_Cases.py
ADDED
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.prompts import PromptTemplate
|
2 |
+
from langchain.output_parsers import PydanticOutputParser
|
3 |
+
|
4 |
+
from llama_index import VectorStoreIndex, ServiceContext, StorageContext
|
5 |
+
from llama_index.vector_stores import FaissVectorStore
|
6 |
+
from llama_index.tools import QueryEngineTool, ToolMetadata
|
7 |
+
from llama_index.query_engine import SubQuestionQueryEngine
|
8 |
+
from llama_index.embeddings import OpenAIEmbedding
|
9 |
+
from llama_index.schema import Document
|
10 |
+
from llama_index.node_parser import UnstructuredElementNodeParser
|
11 |
+
|
12 |
+
from src.utils import get_model, process_pdf2
|
13 |
+
|
14 |
+
import streamlit as st
|
15 |
+
import os
|
16 |
+
import faiss
|
17 |
+
import time
|
18 |
+
from pypdf import PdfReader
|
19 |
+
|
20 |
+
|
21 |
+
st.set_page_config(page_title="Yield Case Analyzer", page_icon=":card_index_dividers:", initial_sidebar_state="expanded", layout="wide")
|
22 |
+
|
23 |
+
st.title(":card_index_dividers: Yield Case Analyzer")
|
24 |
+
st.info("""
|
25 |
+
Begin by uploading the case report in PDF format. Afterward, click on 'Process Document'. Once the document has been processed. You can enter question and click send, system will answer your question.
|
26 |
+
""")
|
27 |
+
|
28 |
+
def process_pdf(pdf):
|
29 |
+
file = PdfReader(pdf)
|
30 |
+
print("in process pdf")
|
31 |
+
document_list = []
|
32 |
+
for page in file.pages:
|
33 |
+
document_list.append(Document(text=str(page.extract_text())))
|
34 |
+
print("in process pdf 1")
|
35 |
+
|
36 |
+
node_paser = UnstructuredElementNodeParser()
|
37 |
+
print("in process pdf 1")
|
38 |
+
|
39 |
+
nodes = node_paser.get_nodes_from_documents(document_list, show_progress=True)
|
40 |
+
|
41 |
+
return nodes
|
42 |
+
|
43 |
+
|
44 |
+
def get_vector_index(nodes, vector_store):
|
45 |
+
print(nodes)
|
46 |
+
llm = get_model("openai")
|
47 |
+
if vector_store == "faiss":
|
48 |
+
d = 1536
|
49 |
+
faiss_index = faiss.IndexFlatL2(d)
|
50 |
+
vector_store = FaissVectorStore(faiss_index=faiss_index)
|
51 |
+
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
52 |
+
# embed_model = OpenAIEmbedding()
|
53 |
+
# service_context = ServiceContext.from_defaults(embed_model=embed_model)
|
54 |
+
service_context = ServiceContext.from_defaults(llm=llm)
|
55 |
+
index = VectorStoreIndex(nodes,
|
56 |
+
service_context=service_context,
|
57 |
+
storage_context=storage_context
|
58 |
+
)
|
59 |
+
elif vector_store == "simple":
|
60 |
+
index = VectorStoreIndex.from_documents(nodes)
|
61 |
+
|
62 |
+
|
63 |
+
return index
|
64 |
+
|
65 |
+
|
66 |
+
|
67 |
+
def generate_insight(engine, search_string):
|
68 |
+
|
69 |
+
with open("prompts/report.prompt", "r") as f:
|
70 |
+
template = f.read()
|
71 |
+
|
72 |
+
prompt_template = PromptTemplate(
|
73 |
+
template=template,
|
74 |
+
input_variables=['search_string']
|
75 |
+
)
|
76 |
+
|
77 |
+
formatted_input = prompt_template.format(search_string=search_string)
|
78 |
+
print(formatted_input)
|
79 |
+
response = engine.query(formatted_input)
|
80 |
+
return response.response
|
81 |
+
|
82 |
+
|
83 |
+
def get_query_engine(engine):
|
84 |
+
llm = get_model("openai")
|
85 |
+
service_context = ServiceContext.from_defaults(llm=llm)
|
86 |
+
|
87 |
+
query_engine_tools = [
|
88 |
+
QueryEngineTool(
|
89 |
+
query_engine=engine,
|
90 |
+
metadata=ToolMetadata(
|
91 |
+
name="Case Report",
|
92 |
+
description=f"Provides information about the cases from its case report.",
|
93 |
+
),
|
94 |
+
),
|
95 |
+
]
|
96 |
+
|
97 |
+
|
98 |
+
s_engine = SubQuestionQueryEngine.from_defaults(
|
99 |
+
query_engine_tools=query_engine_tools,
|
100 |
+
service_context=service_context
|
101 |
+
)
|
102 |
+
return s_engine
|
103 |
+
|
104 |
+
|
105 |
+
if "process_doc" not in st.session_state:
|
106 |
+
st.session_state.process_doc = False
|
107 |
+
|
108 |
+
|
109 |
+
OPENAI_API_KEY = "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"
|
110 |
+
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
|
111 |
+
|
112 |
+
|
113 |
+
if OPENAI_API_KEY:
|
114 |
+
pdfs = st.sidebar.file_uploader("Upload the case report in PDF format", type="pdf")
|
115 |
+
st.sidebar.info("""
|
116 |
+
Example pdf reports you can upload here:
|
117 |
+
""")
|
118 |
+
|
119 |
+
if st.sidebar.button("Process Document"):
|
120 |
+
with st.spinner("Processing Document..."):
|
121 |
+
nodes = process_pdf(pdfs)
|
122 |
+
#st.session_state.index = get_vector_index(nodes, vector_store="faiss")
|
123 |
+
st.session_state.index = get_vector_index(nodes, vector_store="simple")
|
124 |
+
st.session_state.process_doc = True
|
125 |
+
st.toast("Document Processsed!")
|
126 |
+
|
127 |
+
#st.session_state.process_doc = True
|
128 |
+
|
129 |
+
if st.session_state.process_doc:
|
130 |
+
search_text = st.text_input("Enter your question")
|
131 |
+
if st.button("Submit"):
|
132 |
+
engine = get_query_engine(st.session_state.index.as_query_engine(similarity_top_k=3))
|
133 |
+
start_time = time.time()
|
134 |
+
|
135 |
+
with st.status("**Analyzing Report...**"):
|
136 |
+
st.write("Case search result...")
|
137 |
+
response = generate_insight(engine, search_text)
|
138 |
+
st.session_state["end_time"] = "{:.2f}".format((time.time() - start_time))
|
139 |
+
|
140 |
+
st.toast("Report Analysis Complete!")
|
141 |
+
|
142 |
+
if st.session_state.end_time:
|
143 |
+
st.write("Report Analysis Time: ", st.session_state.end_time, "s")
|
144 |
+
|
pages/Tesla_Cases1.py
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
|
4 |
+
from llama_index import download_loader, SimpleDirectoryReader, StorageContext, load_index_from_storage
|
5 |
+
from llama_index.llms import HuggingFaceLLM
|
6 |
+
from llama_index.embeddings import HuggingFaceEmbedding
|
7 |
+
|
8 |
+
from IPython.display import Markdown, display
|
9 |
+
|
10 |
+
import chromadb
|
11 |
+
|
12 |
+
import streamlit as st
|
13 |
+
import time
|
14 |
+
from pypdf import PdfReader
|
15 |
+
|
16 |
+
from pathlib import Path
|
17 |
+
|
18 |
+
import os
|
19 |
+
|
20 |
+
import torch
|
21 |
+
#torch.set_default_device('cuda')
|
22 |
+
|
23 |
+
|
24 |
+
st.set_page_config(page_title="Tesla Case Analyzer", page_icon=":card_index_dividers:", initial_sidebar_state="expanded", layout="wide")
|
25 |
+
|
26 |
+
st.title(":card_index_dividers: Tesla Case Analyzer")
|
27 |
+
st.info("""
|
28 |
+
Begin by uploading the case report in pptx format. Afterward, click on 'Process Document'. Once the document has been processed. You can enter question and click send, system will answer your question.
|
29 |
+
""")
|
30 |
+
|
31 |
+
if "process_doc" not in st.session_state:
|
32 |
+
st.session_state.process_doc = False
|
33 |
+
|
34 |
+
|
35 |
+
|
36 |
+
llm = HuggingFaceLLM(
|
37 |
+
context_window=8000,
|
38 |
+
max_new_tokens=256,
|
39 |
+
generate_kwargs={"temperature": 0.1, "do_sample": True},
|
40 |
+
system_prompt=system_prompt,
|
41 |
+
query_wrapper_prompt=query_wrapper_prompt,
|
42 |
+
tokenizer_name="mistralai/Mistral-7B-Instruct-v0.1",
|
43 |
+
model_name="mistralai/Mistral-7B-Instruct-v0.1",
|
44 |
+
device_map="auto",
|
45 |
+
tokenizer_kwargs={"max_length": 8000},
|
46 |
+
model_kwargs={"torch_dtype": torch.float16}
|
47 |
+
)
|
48 |
+
|
49 |
+
embed_model = HuggingFaceEmbedding(model_name="thenlper/gte-base")
|
50 |
+
|
51 |
+
service_context = ServiceContext.from_defaults(
|
52 |
+
chunk_size=1024,
|
53 |
+
llm=llm,
|
54 |
+
embed_model=embed_model
|
55 |
+
)
|
56 |
+
|
57 |
+
|
58 |
+
files_uploaded = st.sidebar.file_uploader("Upload the case report in pptx format", type="pptx",accept_multiple_files=True)
|
59 |
+
st.sidebar.info("""
|
60 |
+
Example pdf reports you can upload here:
|
61 |
+
""")
|
62 |
+
|
63 |
+
if st.sidebar.button("Process Document"):
|
64 |
+
with st.spinner("Processing Document..."):
|
65 |
+
|
66 |
+
data_dir = './data'
|
67 |
+
if not os.path.exists(data_dir):
|
68 |
+
os.makedirs(data_dir)
|
69 |
+
|
70 |
+
for pdf in files_uploaded:
|
71 |
+
print(f'file named {pdf.name}')
|
72 |
+
fname=f'{data_dir}/{pdf.name}'
|
73 |
+
with open(fname, 'wb') as f:
|
74 |
+
f.write(pdf.read())
|
75 |
+
|
76 |
+
|
77 |
+
def fmetadata(dummy: str): return {"file_path": ""}
|
78 |
+
|
79 |
+
PptxReader = download_loader("PptxReader")
|
80 |
+
loader = SimpleDirectoryReader(input_dir=data_dir, file_extractor={".pptx": PptxReader(),}, file_metadata=fmetadata)
|
81 |
+
|
82 |
+
documents = loader.load_data()
|
83 |
+
for doc in documents:
|
84 |
+
doc.metadata["file_path"]=""
|
85 |
+
|
86 |
+
print('stored')
|
87 |
+
|
88 |
+
st.session_state.process_doc = True
|
89 |
+
|
90 |
+
st.toast("Document Processsed!")
|
91 |
+
|
92 |
+
#st.session_state.process_doc = True
|
93 |
+
|
94 |
+
OPENAI_API_KEY = "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"
|
95 |
+
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
|
96 |
+
|
97 |
+
|
98 |
+
if OPENAI_API_KEY:
|
99 |
+
pdfs = st.sidebar.file_uploader("Upload the case report in PDF format", type="pdf")
|
100 |
+
st.sidebar.info("""
|
101 |
+
Example pdf reports you can upload here:
|
102 |
+
""")
|
103 |
+
|
104 |
+
if st.sidebar.button("Process Document"):
|
105 |
+
with st.spinner("Processing Document..."):
|
106 |
+
nodes = process_pdf(pdfs)
|
107 |
+
#st.session_state.index = get_vector_index(nodes, vector_store="faiss")
|
108 |
+
st.session_state.index = get_vector_index(nodes, vector_store="simple")
|
109 |
+
st.session_state.process_doc = True
|
110 |
+
st.toast("Document Processsed!")
|
111 |
+
|
112 |
+
#st.session_state.process_doc = True
|
113 |
+
|
114 |
+
if st.session_state.process_doc:
|
115 |
+
search_text = st.text_input("Enter your question")
|
116 |
+
if st.button("Submit"):
|
117 |
+
engine = get_query_engine(st.session_state.index.as_query_engine(similarity_top_k=3))
|
118 |
+
start_time = time.time()
|
119 |
+
|
120 |
+
with st.status("**Analyzing Report...**"):
|
121 |
+
st.write("Case search result...")
|
122 |
+
response = generate_insight(engine, search_text)
|
123 |
+
st.session_state["end_time"] = "{:.2f}".format((time.time() - start_time))
|
124 |
+
|
125 |
+
st.toast("Report Analysis Complete!")
|
126 |
+
|
127 |
+
if st.session_state.end_time:
|
128 |
+
st.write("Report Analysis Time: ", st.session_state.end_time, "s")
|
129 |
+
|
prompts/main.prompt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
You are a tesla alert analyzer.
|
2 |
+
Your job is the is to provide a detailed analysis of the following:
|
3 |
+
|
prompts/rag.prompt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
You are a tesla alert analyst tasked with providing an insightful analysis of tesla alerts report.
|
2 |
+
|
3 |
+
Rules to be followed while generating the insight:
|
4 |
+
- Desired Length: {desired_length}
|
5 |
+
- Complexity Level: {complexity_level}
|
6 |
+
- format: {output_format}
|
7 |
+
----------------------------------------------
|
8 |
+
|
9 |
+
Generate Insight for:
|
10 |
+
- Section Name: {section_name}
|
11 |
+
- Insight to be generated: As a yield case analyst, {specific_topic}
|
12 |
+
- Additional Details: {specific_elements}
|
13 |
+
----------------------------------------------
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
llama-index
|
2 |
+
llama_hub
|
3 |
+
transformers
|
4 |
+
accelerate
|
5 |
+
openai
|
6 |
+
pypdf
|
7 |
+
streamlit
|
8 |
+
chromadb
|