Spaces:
Build error
Build error
llamazookeeper
commited on
Commit
•
4595f47
1
Parent(s):
7afc766
- README.md +2 -0
- data/TeslaTroubleshootingCase1_w009.pptx +0 -0
- data/TeslaTroubleshootingCase2_w048.pptx +0 -0
- data/TeslaTroubleshootingCase3_w027.pptx +0 -0
- data/TeslaTroubleshootingCase4_w218.pptx +0 -0
- data/TeslaTroubleshootingCase4_w218_follow_up.pptx +0 -0
- data/TeslaTroubleshootingCase4_w221.pptx +0 -0
- pages/Tesla_Alerts.py +119 -119
- prompts/main.prompt +1 -1
- requirements.txt +1 -0
README.md
CHANGED
@@ -11,3 +11,5 @@ license: apache-2.0
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
14 |
+
|
15 |
+
this project is used in huggingface space "teslaalerts"
|
data/TeslaTroubleshootingCase1_w009.pptx
ADDED
Binary file (36.3 kB). View file
|
|
data/TeslaTroubleshootingCase2_w048.pptx
ADDED
Binary file (36.4 kB). View file
|
|
data/TeslaTroubleshootingCase3_w027.pptx
ADDED
Binary file (36.7 kB). View file
|
|
data/TeslaTroubleshootingCase4_w218.pptx
ADDED
Binary file (36.4 kB). View file
|
|
data/TeslaTroubleshootingCase4_w218_follow_up.pptx
ADDED
Binary file (36.2 kB). View file
|
|
data/TeslaTroubleshootingCase4_w221.pptx
ADDED
Binary file (36.4 kB). View file
|
|
pages/Tesla_Alerts.py
CHANGED
@@ -1,153 +1,153 @@
|
|
1 |
-
from langchain.prompts import PromptTemplate
|
2 |
-
from langchain.output_parsers import PydanticOutputParser
|
3 |
-
from langchain.chat_models import ChatOpenAI
|
4 |
-
|
5 |
-
from llama_index import VectorStoreIndex, ServiceContext, StorageContext, download_loader, SimpleDirectoryReader
|
6 |
-
from llama_index.vector_stores import FaissVectorStore
|
7 |
-
from llama_index.tools import QueryEngineTool, ToolMetadata
|
8 |
-
from llama_index.query_engine import SubQuestionQueryEngine
|
9 |
-
from llama_index.embeddings import OpenAIEmbedding
|
10 |
-
from llama_index.schema import Document
|
11 |
-
from llama_index.node_parser import UnstructuredElementNodeParser
|
12 |
-
from llama_index.llms import OpenAI
|
13 |
-
|
14 |
-
import streamlit as st
|
15 |
import os
|
16 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
import time
|
18 |
|
19 |
|
20 |
-
st.set_page_config(page_title="
|
21 |
|
22 |
-
st.title(":card_index_dividers:
|
23 |
st.info("""
|
24 |
-
Begin by uploading the case report in
|
25 |
""")
|
26 |
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
if model_name == "openai":
|
31 |
-
model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model_name="gpt-3.5-turbo")
|
32 |
-
return model
|
33 |
|
34 |
-
def get_vector_index(docs, vector_store):
|
35 |
-
print(docs)
|
36 |
-
llm = get_model("openai")
|
37 |
-
if vector_store == "faiss":
|
38 |
-
d = 1536
|
39 |
-
faiss_index = faiss.IndexFlatL2(d)
|
40 |
-
vector_store = FaissVectorStore(faiss_index=faiss_index)
|
41 |
-
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
42 |
-
# embed_model = OpenAIEmbedding()
|
43 |
-
# service_context = ServiceContext.from_defaults(embed_model=embed_model)
|
44 |
-
service_context = ServiceContext.from_defaults(llm=llm)
|
45 |
-
index = VectorStoreIndex(docs,
|
46 |
-
service_context=service_context,
|
47 |
-
storage_context=storage_context
|
48 |
-
)
|
49 |
-
elif vector_store == "simple":
|
50 |
-
index = VectorStoreIndex.from_documents(docs)
|
51 |
|
52 |
|
53 |
-
return index
|
54 |
|
|
|
55 |
|
|
|
56 |
|
57 |
-
|
|
|
|
|
58 |
|
59 |
-
|
60 |
-
|
61 |
|
62 |
-
|
63 |
-
template=template,
|
64 |
-
input_variables=['search_string']
|
65 |
-
)
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
return response.response
|
71 |
|
|
|
|
|
72 |
|
73 |
-
def get_query_engine(engine):
|
74 |
-
llm = get_model("openai")
|
75 |
-
service_context = ServiceContext.from_defaults(llm=llm)
|
76 |
|
77 |
-
query_engine_tools = [
|
78 |
-
QueryEngineTool(
|
79 |
-
query_engine=engine,
|
80 |
-
metadata=ToolMetadata(
|
81 |
-
name="Alert Report",
|
82 |
-
description=f"Provides information about the alerts from alerts files uploaded.",
|
83 |
-
),
|
84 |
-
),
|
85 |
-
]
|
86 |
|
|
|
|
|
87 |
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
)
|
92 |
-
return s_engine
|
93 |
-
|
94 |
-
|
95 |
-
if "process_doc" not in st.session_state:
|
96 |
-
st.session_state.process_doc = False
|
97 |
|
|
|
|
|
|
|
|
|
98 |
|
99 |
-
|
100 |
-
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
|
101 |
|
|
|
102 |
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
Example pdf reports you can upload here:
|
107 |
-
""")
|
108 |
-
|
109 |
-
if st.sidebar.button("Process Document"):
|
110 |
-
with st.spinner("Processing Document..."):
|
111 |
-
|
112 |
-
data_dir = "./data"
|
113 |
-
if not os.path.exists(data_dir):
|
114 |
-
os.makedirs(data_dir)
|
115 |
-
|
116 |
-
for file in files_uploaded:
|
117 |
-
print(f'file named {file.name}')
|
118 |
-
fname=f'{data_dir}/{file.name}'
|
119 |
-
with open(fname, 'wb') as f:
|
120 |
-
f.write(file.read())
|
121 |
-
|
122 |
-
def fmetadata(dummy: str): return {"file_path": ""}
|
123 |
-
|
124 |
-
PptxReader = download_loader("PptxReader")
|
125 |
-
loader = SimpleDirectoryReader(input_dir=data_dir, file_extractor={".pptx": PptxReader(),}, file_metadata=fmetadata)
|
126 |
-
|
127 |
-
documents = loader.load_data()
|
128 |
-
for doc in documents:
|
129 |
-
doc.metadata["file_path"]=""
|
130 |
-
|
131 |
-
st.session_state.index = get_vector_index(documents, vector_store="faiss")
|
132 |
-
#st.session_state.index = get_vector_index(documents, vector_store="simple")
|
133 |
-
st.session_state.process_doc = True
|
134 |
-
st.toast("Document Processsed!")
|
135 |
|
136 |
-
|
137 |
|
138 |
-
|
139 |
-
search_text = st.text_input("Enter your question")
|
140 |
-
if st.button("Submit"):
|
141 |
-
engine = get_query_engine(st.session_state.index.as_query_engine(similarity_top_k=3))
|
142 |
-
start_time = time.time()
|
143 |
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
#st.session_state["end_time"] = "{:.2f}".format((time.time() - start_time))
|
148 |
|
149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
|
151 |
-
|
152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
import sys
|
3 |
+
|
4 |
+
from llama_index import SimpleDirectoryReader, ServiceContext, StorageContext, VectorStoreIndex, download_loader,load_index_from_storage
|
5 |
+
from llama_index.llms import HuggingFaceLLM
|
6 |
+
from llama_index.embeddings import HuggingFaceEmbedding
|
7 |
+
from llama_index.vector_stores import ChromaVectorStore
|
8 |
+
from llama_index.storage.index_store import SimpleIndexStore
|
9 |
+
from llama_index.indices.query.schema import QueryBundle, QueryType
|
10 |
+
import chromadb
|
11 |
+
import streamlit as st
|
12 |
import time
|
13 |
|
14 |
|
15 |
+
st.set_page_config(page_title="Tesla Alert Analyzer", page_icon=":card_index_dividers:", initial_sidebar_state="expanded", layout="wide")
|
16 |
|
17 |
+
st.title(":card_index_dividers: Tesla Alert Analyzer")
|
18 |
st.info("""
|
19 |
+
Begin by uploading the case report in pptx format. Afterward, click on 'Process Document'. Once the document has been processed. You can enter question and click send, system will answer your question.
|
20 |
""")
|
21 |
|
22 |
|
23 |
+
if "process_doc" not in st.session_state:
|
24 |
+
st.session_state.process_doc = False
|
|
|
|
|
|
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
|
|
|
28 |
|
29 |
+
def fmetadata(dummy: str): return {"file_path": ""}
|
30 |
|
31 |
+
def load_files(file_dir):
|
32 |
|
33 |
+
PptxReader = download_loader("PptxReader")
|
34 |
+
loader = SimpleDirectoryReader(input_dir=file_dir, file_extractor={".pptx": PptxReader(),}, file_metadata=fmetadata)
|
35 |
+
documents = loader.load_data()
|
36 |
|
37 |
+
for doc in documents:
|
38 |
+
doc.metadata["file_path"]=""
|
39 |
|
40 |
+
return documents
|
|
|
|
|
|
|
41 |
|
42 |
+
system_prompt = "You are a Q&A assistant. "
|
43 |
+
system_prompt += "Your goal is to answer questions as accurately as possible based on the instructions and context provided."
|
44 |
+
system_prompt += "Please say you do not know if you do not find answer."
|
|
|
45 |
|
46 |
+
# This will wrap the default prompts that are internal to llama-index
|
47 |
+
query_wrapper_prompt = "<|USER|>{query_str}<|ASSISTANT|>"
|
48 |
|
|
|
|
|
|
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
+
import torch
|
52 |
+
#torch.set_default_device('cuda')
|
53 |
|
54 |
+
@st.cache_resource
|
55 |
+
def llm_loading():
|
56 |
+
print("before huggingfacellm")
|
57 |
+
llm = HuggingFaceLLM(
|
58 |
+
context_window=8000,
|
59 |
+
max_new_tokens=500,
|
60 |
+
generate_kwargs={"temperature": 0.1, "do_sample": True},
|
61 |
+
system_prompt=system_prompt,
|
62 |
+
query_wrapper_prompt=query_wrapper_prompt,
|
63 |
+
tokenizer_name="mistralai/Mistral-7B-Instruct-v0.1",
|
64 |
+
model_name="mistralai/Mistral-7B-Instruct-v0.1",
|
65 |
+
device_map="auto",
|
66 |
+
tokenizer_kwargs={"max_length": 8000},
|
67 |
+
model_kwargs={"torch_dtype": torch.float16}
|
68 |
)
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
+
print("after huggingfacellm")
|
71 |
+
embed_model = HuggingFaceEmbedding(model_name="thenlper/gte-base")
|
72 |
+
print("after embed_model")
|
73 |
+
return llm,embed_model
|
74 |
|
75 |
+
llm, embed_model = llm_loading()
|
|
|
76 |
|
77 |
+
files_uploaded = st.sidebar.file_uploader("Upload the case report in PPT format", type="pptx", accept_multiple_files=True)
|
78 |
|
79 |
+
st.sidebar.info("""
|
80 |
+
Example pptx reports you can upload here:
|
81 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
+
if st.sidebar.button("Process Document"):
|
84 |
|
85 |
+
with st.spinner("Processing Document..."):
|
|
|
|
|
|
|
|
|
86 |
|
87 |
+
data_dir = "data"
|
88 |
+
if not os.path.exists(data_dir):
|
89 |
+
os.makedirs(data_dir)
|
|
|
90 |
|
91 |
+
for uploaded_file in files_uploaded:
|
92 |
+
print(f'file named {uploaded_file.name}')
|
93 |
+
fname=f'{data_dir}/{uploaded_file.name}'
|
94 |
+
with open(fname, 'wb') as f:
|
95 |
+
f.write(uploaded_file.read())
|
96 |
+
|
97 |
+
documents=load_files(data_dir)
|
98 |
|
99 |
+
collection_name = "tesla_report"
|
100 |
+
chroma_client = chromadb.PersistentClient()
|
101 |
+
chroma_collection = chroma_client.get_or_create_collection(collection_name)
|
102 |
+
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
|
103 |
+
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
104 |
+
service_context = ServiceContext.from_defaults(
|
105 |
+
chunk_size=8000,
|
106 |
+
llm=llm,
|
107 |
+
embed_model=embed_model
|
108 |
+
)
|
109 |
+
index = VectorStoreIndex.from_documents(documents, service_context=service_context, storage_context=storage_context)
|
110 |
+
index.storage_context.persist()
|
111 |
+
|
112 |
+
#chroma_collection.peek()
|
113 |
+
|
114 |
+
#st.session_state.index = index
|
115 |
+
st.session_state.process_doc = True
|
116 |
+
|
117 |
+
st.toast("Document Processsed!")
|
118 |
+
|
119 |
+
#st.session_state.process_doc = True
|
120 |
+
|
121 |
+
def clear_form():
|
122 |
+
st.session_state.query_text = st.session_state["question"]
|
123 |
+
st.session_state["question"] = ""
|
124 |
+
st.session_state["response"] = ""
|
125 |
+
|
126 |
+
@st.cache_resource
|
127 |
+
def reload_index(_llm,_embed_model, col ) :
|
128 |
+
chroma_client = chromadb.PersistentClient()
|
129 |
+
chroma_collection = chroma_client.get_or_create_collection(col)
|
130 |
+
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
|
131 |
+
service_context = ServiceContext.from_defaults(llm=llm,embed_model=embed_model)
|
132 |
+
load_index = VectorStoreIndex.from_vector_store(service_context=service_context,
|
133 |
+
vector_store=vector_store)
|
134 |
+
return load_index
|
135 |
+
|
136 |
+
if st.session_state.process_doc:
|
137 |
+
#alert number looks like APP_wnnn where nnn is a number. Please list out all the alerts uploaded in these files!
|
138 |
+
search_text = st.text_input("Enter your question", key='question' )
|
139 |
+
if st.button(label="Submit", on_click=clear_form):
|
140 |
+
index = reload_index(llm,embed_model,"tesla_report" )
|
141 |
+
query_engine = index.as_query_engine()
|
142 |
+
start_time = time.time()
|
143 |
+
#qry = QueryBundle(search_text)
|
144 |
+
#alert number looks like APP_wnnn where nnn is a number. Please list out all the alerts uploaded in these files!"
|
145 |
+
st.write("Processing....")
|
146 |
+
search_text = st.session_state.query_text
|
147 |
+
print(search_text)
|
148 |
+
response = query_engine.query(search_text)
|
149 |
+
st.write(response.response)
|
150 |
+
#st.session_state["end_time"] = "{:.2f}".format((time.time() - start_time))
|
151 |
+
|
152 |
+
st.toast("Report Analysis Complete!")
|
153 |
|
prompts/main.prompt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
You are a tesla alert analyzer.
|
2 |
Your job is the is to provide a detailed analysis of the alert and follow-up
|
3 |
-
|
|
|
1 |
You are a tesla alert analyzer.
|
2 |
Your job is the is to provide a detailed analysis of the alert and follow-up
|
3 |
+
Question: {search_string}
|
requirements.txt
CHANGED
@@ -5,3 +5,4 @@ transformers
|
|
5 |
accelerate
|
6 |
openai
|
7 |
streamlit
|
|
|
|
5 |
accelerate
|
6 |
openai
|
7 |
streamlit
|
8 |
+
faiss
|