Spaces:
Build error
Build error
import os | |
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext | |
from llama_index import download_loader, SimpleDirectoryReader, StorageContext, load_index_from_storage | |
from llama_index.llms import HuggingFaceLLM | |
from llama_index.embeddings import HuggingFaceEmbedding | |
from IPython.display import Markdown, display | |
import chromadb | |
import streamlit as st | |
import time | |
from pypdf import PdfReader | |
from pathlib import Path | |
import os | |
import torch | |
#torch.set_default_device('cuda') | |
st.set_page_config(page_title="Tesla Case Analyzer", page_icon=":card_index_dividers:", initial_sidebar_state="expanded", layout="wide") | |
st.title(":card_index_dividers: Tesla Case Analyzer") | |
st.info(""" | |
Begin by uploading the case report in pptx format. Afterward, click on 'Process Document'. Once the document has been processed. You can enter question and click send, system will answer your question. | |
""") | |
if "process_doc" not in st.session_state: | |
st.session_state.process_doc = False | |
llm = HuggingFaceLLM( | |
context_window=8000, | |
max_new_tokens=256, | |
generate_kwargs={"temperature": 0.1, "do_sample": True}, | |
system_prompt=system_prompt, | |
query_wrapper_prompt=query_wrapper_prompt, | |
tokenizer_name="mistralai/Mistral-7B-Instruct-v0.1", | |
model_name="mistralai/Mistral-7B-Instruct-v0.1", | |
device_map="auto", | |
tokenizer_kwargs={"max_length": 8000}, | |
model_kwargs={"torch_dtype": torch.float16} | |
) | |
embed_model = HuggingFaceEmbedding(model_name="thenlper/gte-base") | |
service_context = ServiceContext.from_defaults( | |
chunk_size=1024, | |
llm=llm, | |
embed_model=embed_model | |
) | |
files_uploaded = st.sidebar.file_uploader("Upload the case report in pptx format", type="pptx",accept_multiple_files=True) | |
st.sidebar.info(""" | |
Example pdf reports you can upload here: | |
""") | |
if st.sidebar.button("Process Document"): | |
with st.spinner("Processing Document..."): | |
data_dir = './data' | |
if not os.path.exists(data_dir): | |
os.makedirs(data_dir) | |
for pdf in files_uploaded: | |
print(f'file named {pdf.name}') | |
fname=f'{data_dir}/{pdf.name}' | |
with open(fname, 'wb') as f: | |
f.write(pdf.read()) | |
def fmetadata(dummy: str): return {"file_path": ""} | |
PptxReader = download_loader("PptxReader") | |
loader = SimpleDirectoryReader(input_dir=data_dir, file_extractor={".pptx": PptxReader(),}, file_metadata=fmetadata) | |
documents = loader.load_data() | |
for doc in documents: | |
doc.metadata["file_path"]="" | |
print('stored') | |
st.session_state.process_doc = True | |
st.toast("Document Processsed!") | |
#st.session_state.process_doc = True | |
OPENAI_API_KEY = "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" | |
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY | |
if OPENAI_API_KEY: | |
pdfs = st.sidebar.file_uploader("Upload the case report in PDF format", type="pdf") | |
st.sidebar.info(""" | |
Example pdf reports you can upload here: | |
""") | |
if st.sidebar.button("Process Document"): | |
with st.spinner("Processing Document..."): | |
nodes = process_pdf(pdfs) | |
#st.session_state.index = get_vector_index(nodes, vector_store="faiss") | |
st.session_state.index = get_vector_index(nodes, vector_store="simple") | |
st.session_state.process_doc = True | |
st.toast("Document Processsed!") | |
#st.session_state.process_doc = True | |
if st.session_state.process_doc: | |
search_text = st.text_input("Enter your question") | |
if st.button("Submit"): | |
engine = get_query_engine(st.session_state.index.as_query_engine(similarity_top_k=3)) | |
start_time = time.time() | |
with st.status("**Analyzing Report...**"): | |
st.write("Case search result...") | |
response = generate_insight(engine, search_text) | |
st.session_state["end_time"] = "{:.2f}".format((time.time() - start_time)) | |
st.toast("Report Analysis Complete!") | |
if st.session_state.end_time: | |
st.write("Report Analysis Time: ", st.session_state.end_time, "s") | |