datawithsuman's picture
Update app.py
66de7f2 verified
raw
history blame
No virus
8.02 kB
import os
import streamlit as st
import streamlit.components.v1 as components
import openai
from llama_index.llms.openai import OpenAI
import os
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, PropertyGraphIndex
from llama_index.core.indices.property_graph import (
ImplicitPathExtractor,
SimpleLLMPathExtractor,
)
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.retrievers import BaseRetriever
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.openai import OpenAIEmbedding
from llmlingua import PromptCompressor
from rouge_score import rouge_scorer
from semantic_text_similarity.models import WebBertSimilarity
import nest_asyncio
# Apply nest_asyncio
nest_asyncio.apply()
# OpenAI credentials
key = os.getenv('OPENAI_API_KEY')
openai.api_key = key
os.environ["OPENAI_API_KEY"] = key
# Streamlit UI
st.title("Prompt Optimization for One-Stop Policy QA Bot")
uploaded_files = st.file_uploader("Upload a PDF file", type="pdf", accept_multiple_files=True)
if uploaded_files:
for uploaded_file in uploaded_files:
reader = SimpleDirectoryReader(input_files=[f"../data/{uploaded_file.name}"])
documents = reader.load_data()
st.success("File uploaded...")
# Indexing
index = PropertyGraphIndex.from_documents(
documents,
embed_model=OpenAIEmbedding(model_name="text-embedding-3-small"),
kg_extractors=[
ImplicitPathExtractor(),
SimpleLLMPathExtractor(
llm=OpenAI(model="gpt-3.5-turbo", temperature=0.3),
num_workers=4,
max_paths_per_chunk=10,
),
],
show_progress=True,
)
# Save Knowlege Graph
index.property_graph_store.save_networkx_graph(name="../data/kg.html")
# Display the graph in Streamlit
st.success("File Processed...")
st.success("Creating Knowledge Graph...")
HtmlFile = open("../data/kg.html", 'r', encoding='utf-8')
source_code = HtmlFile.read()
components.html(source_code, height= 500, width=700)
# Retrieval
kg_retriever = index.as_retriever(
include_text=True, # include source text, default True
)
# Generation
model = "gpt-3.5-turbo"
def get_context(query):
contexts = kg_retriever.retrieve(query)
context_list = [n.text for n in contexts]
return context_list
def res(prompt):
response = openai.chat.completions.create(
model=model,
messages=[
{"role":"system",
"content":"You are a helpful assistant who answers from the following context. If the answer can't be found in context, politely refuse"
},
{"role": "user",
"content": prompt,
}
]
)
return [response.usage.prompt_tokens, response.usage.completion_tokens, response.usage.total_tokens, response.choices[0].message.content]
# Initialize session state for token summary, evaluation details, and chat messages
if "token_summary" not in st.session_state:
st.session_state.token_summary = []
if "messages" not in st.session_state:
st.session_state.messages = []
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Accept user input
if prompt := st.chat_input("Enter your query:"):
st.success("Fetching info...")
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
# Generate response
# st.success("Fetching info...")
context_list = get_context(prompt)
context = " ".join(context_list)
# Original prompt response
full_prompt = "\n\n".join([context + prompt])
orig_res = res(full_prompt)
st.session_state.messages.append({"role": "assistant", "content": "Generating Original prompt response..."})
st.session_state.messages.append({"role": "assistant", "content": orig_res[3]})
st.success("Generating Original prompt response...")
with st.chat_message("assistant"):
st.markdown(orig_res[3])
# Compressed Response
st.session_state.messages.append({"role": "assistant", "content": "Generating Optimized prompt response..."})
st.success("Generating Optimized prompt response...")
llm_lingua = PromptCompressor(
model_name="microsoft/llmlingua-2-xlm-roberta-large-meetingbank",
use_llmlingua2=True, device_map="mps"
)
def prompt_compression(context, rate=0.5):
compressed_context = llm_lingua.compress_prompt(
context,
rate=rate,
force_tokens=["!", ".", "?", "\n"],
drop_consecutive=True,
)
return compressed_context
compressed_context = prompt_compression(context)
full_prompt = "\n\n".join([compressed_context['compressed_prompt'] + prompt])
compressed_res = res(full_prompt)
st.session_state.messages.append({"role": "assistant", "content": compressed_res[3]})
with st.chat_message("assistant"):
st.markdown(compressed_res[3])
# Save token summary and evaluation details to session state
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
scores = scorer.score(compressed_res[3],orig_res[3])
webert_model = WebBertSimilarity(device='cpu')
similarity_score = webert_model.predict([(compressed_res[3], orig_res[3])])[0] / 5 * 100
# Display token summary
st.session_state.messages.append({"role": "assistant", "content": "Token Length Summary..."})
st.success('Token Length Summary...')
st.session_state.messages.append({"role": "assistant", "content": f"Original Prompt has {orig_res[0]} tokens"})
st.write(f"Original Prompt has {orig_res[0]} tokens")
st.session_state.messages.append({"role": "assistant", "content": f"Optimized Prompt has {compressed_res[0]} tokens"})
st.write(f"Optimized Prompt has {compressed_res[0]} tokens")
st.session_state.messages.append({"role": "assistant", "content": "Comparing Original and Optimized Prompt Response..."})
st.success("Comparing Original and Optimized Prompt Response...")
st.session_state.messages.append({"role": "assistant", "content": f"Rouge Score : {scores['rougeL'].fmeasure * 100}"})
st.write(f"Rouge Score : {scores['rougeL'].fmeasure * 100}")
st.session_state.messages.append({"role": "assistant", "content": f"Semantic Text Similarity Score : {similarity_score}"})
st.write(f"Semantic Text Similarity Score : {similarity_score}")
st.write(" ")
# origin_tokens = compressed_context['origin_tokens']
# compressed_tokens = compressed_context['compressed_tokens']
origin_tokens = orig_res[0]
compressed_tokens = compressed_res[0]
saving = (origin_tokens - compressed_tokens) * 0.06 / 1000
st.session_state.messages.append({"role": "assistant", "content": f"The optimized prompt has ${saving:.4f} saved in GPT-4."})
st.success(f"The optimized prompt has ${saving:.4f} saved in GPT-4.")