import os import streamlit as st import streamlit.components.v1 as components import openai from llama_index.llms.openai import OpenAI import os from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, PropertyGraphIndex from llama_index.core.indices.property_graph import ( ImplicitPathExtractor, SimpleLLMPathExtractor, ) from llama_index.retrievers.bm25 import BM25Retriever from llama_index.core.retrievers import BaseRetriever from llama_index.core.node_parser import SentenceSplitter from llama_index.embeddings.openai import OpenAIEmbedding from llmlingua import PromptCompressor from rouge_score import rouge_scorer from semantic_text_similarity.models import WebBertSimilarity import nest_asyncio # Apply nest_asyncio nest_asyncio.apply() # OpenAI credentials key = os.getenv('OPENAI_API_KEY') openai.api_key = key os.environ["OPENAI_API_KEY"] = key # Streamlit UI st.title("Prompt Optimization for One-Stop Policy QA Bot") uploaded_files = st.file_uploader("Upload a PDF file", type="pdf", accept_multiple_files=True) if uploaded_files: for uploaded_file in uploaded_files: reader = SimpleDirectoryReader(input_files=[f"../data/{uploaded_file.name}"]) documents = reader.load_data() st.success("File uploaded...") # Indexing index = PropertyGraphIndex.from_documents( documents, embed_model=OpenAIEmbedding(model_name="text-embedding-3-small"), kg_extractors=[ ImplicitPathExtractor(), SimpleLLMPathExtractor( llm=OpenAI(model="gpt-3.5-turbo", temperature=0.3), num_workers=4, max_paths_per_chunk=10, ), ], show_progress=True, ) # Save Knowlege Graph index.property_graph_store.save_networkx_graph(name="../data/kg.html") # Display the graph in Streamlit st.success("File Processed...") st.success("Creating Knowledge Graph...") HtmlFile = open("../data/kg.html", 'r', encoding='utf-8') source_code = HtmlFile.read() components.html(source_code, height= 500, width=700) # Retrieval kg_retriever = index.as_retriever( include_text=True, # include source text, default True ) # Generation model = "gpt-3.5-turbo" def get_context(query): contexts = kg_retriever.retrieve(query) context_list = [n.text for n in contexts] return context_list def res(prompt): response = openai.chat.completions.create( model=model, messages=[ {"role":"system", "content":"You are a helpful assistant who answers from the following context. If the answer can't be found in context, politely refuse" }, {"role": "user", "content": prompt, } ] ) return [response.usage.prompt_tokens, response.usage.completion_tokens, response.usage.total_tokens, response.choices[0].message.content] # Initialize session state for token summary, evaluation details, and chat messages if "token_summary" not in st.session_state: st.session_state.token_summary = [] if "messages" not in st.session_state: st.session_state.messages = [] # Display chat messages from history on app rerun for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # Accept user input if prompt := st.chat_input("Enter your query:"): st.success("Fetching info...") # Add user message to chat history st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) # Generate response # st.success("Fetching info...") context_list = get_context(prompt) context = " ".join(context_list) # Original prompt response full_prompt = "\n\n".join([context + prompt]) orig_res = res(full_prompt) st.session_state.messages.append({"role": "assistant", "content": "Generating Original prompt response..."}) st.session_state.messages.append({"role": "assistant", "content": orig_res[3]}) st.success("Generating Original prompt response...") with st.chat_message("assistant"): st.markdown(orig_res[3]) # Compressed Response st.session_state.messages.append({"role": "assistant", "content": "Generating Optimized prompt response..."}) st.success("Generating Optimized prompt response...") llm_lingua = PromptCompressor( model_name="microsoft/llmlingua-2-xlm-roberta-large-meetingbank", use_llmlingua2=True, device_map="mps" ) def prompt_compression(context, rate=0.5): compressed_context = llm_lingua.compress_prompt( context, rate=rate, force_tokens=["!", ".", "?", "\n"], drop_consecutive=True, ) return compressed_context compressed_context = prompt_compression(context) full_prompt = "\n\n".join([compressed_context['compressed_prompt'] + prompt]) compressed_res = res(full_prompt) st.session_state.messages.append({"role": "assistant", "content": compressed_res[3]}) with st.chat_message("assistant"): st.markdown(compressed_res[3]) # Save token summary and evaluation details to session state scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True) scores = scorer.score(compressed_res[3],orig_res[3]) webert_model = WebBertSimilarity(device='cpu') similarity_score = webert_model.predict([(compressed_res[3], orig_res[3])])[0] / 5 * 100 # Display token summary st.session_state.messages.append({"role": "assistant", "content": "Token Length Summary..."}) st.success('Token Length Summary...') st.session_state.messages.append({"role": "assistant", "content": f"Original Prompt has {orig_res[0]} tokens"}) st.write(f"Original Prompt has {orig_res[0]} tokens") st.session_state.messages.append({"role": "assistant", "content": f"Optimized Prompt has {compressed_res[0]} tokens"}) st.write(f"Optimized Prompt has {compressed_res[0]} tokens") st.session_state.messages.append({"role": "assistant", "content": "Comparing Original and Optimized Prompt Response..."}) st.success("Comparing Original and Optimized Prompt Response...") st.session_state.messages.append({"role": "assistant", "content": f"Rouge Score : {scores['rougeL'].fmeasure * 100}"}) st.write(f"Rouge Score : {scores['rougeL'].fmeasure * 100}") st.session_state.messages.append({"role": "assistant", "content": f"Semantic Text Similarity Score : {similarity_score}"}) st.write(f"Semantic Text Similarity Score : {similarity_score}") st.write(" ") # origin_tokens = compressed_context['origin_tokens'] # compressed_tokens = compressed_context['compressed_tokens'] origin_tokens = orig_res[0] compressed_tokens = compressed_res[0] saving = (origin_tokens - compressed_tokens) * 0.06 / 1000 st.session_state.messages.append({"role": "assistant", "content": f"The optimized prompt has ${saving:.4f} saved in GPT-4."}) st.success(f"The optimized prompt has ${saving:.4f} saved in GPT-4.")