import os import streamlit as st import streamlit.components.v1 as components import openai from llama_index.llms.openai import OpenAI import os from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, PropertyGraphIndex from llama_index.core.indices.property_graph import ( ImplicitPathExtractor, SimpleLLMPathExtractor, ) from llama_index.retrievers.bm25 import BM25Retriever from llama_index.core.retrievers import BaseRetriever from llama_index.core.node_parser import SentenceSplitter from llama_index.embeddings.openai import OpenAIEmbedding from llama_index.llms.mistralai import MistralAI from llmlingua import PromptCompressor from rouge_score import rouge_scorer from semantic_text_similarity.models import WebBertSimilarity import nest_asyncio # Apply nest_asyncio nest_asyncio.apply() # OpenAI credentials # key = os.getenv('OPENAI_API_KEY') # openai.api_key = key # os.environ["OPENAI_API_KEY"] = key # key = os.getenv('MISTRAL_API_KEY') # os.environ["MISTRAL_API_KEY"] = key # Anthropic credentials key = os.getenv('CLAUDE_API_KEY') os.environ["ANTHROPIC_API_KEY"] = key # Streamlit UI st.title("Prompt Optimization for a Policy Bot") uploaded_files = st.file_uploader("Upload a Policy document in pdf format", type="pdf", accept_multiple_files=True) if uploaded_files: for uploaded_file in uploaded_files: with open(f"./data/{uploaded_file.name}", 'wb') as f: f.write(uploaded_file.getbuffer()) reader = SimpleDirectoryReader(input_files=[f"./data/{uploaded_file.name}"]) documents = reader.load_data() st.write(documents) st.success("File uploaded...") # # Indexing # index = PropertyGraphIndex.from_documents( # documents, # embed_model=OpenAIEmbedding(model_name="text-embedding-3-small"), # kg_extractors=[ # ImplicitPathExtractor(), # SimpleLLMPathExtractor( # llm=OpenAI(model="gpt-3.5-turbo", temperature=0.3), # num_workers=4, # max_paths_per_chunk=10, # ), # ], # show_progress=True, # ) # # Save Knowlege Graph # index.property_graph_store.save_networkx_graph(name="./data/kg.html") # # Display the graph in Streamlit # st.success("File Processed...") # st.success("Creating Knowledge Graph...") # HtmlFile = open("./data/kg.html", 'r', encoding='utf-8') # source_code = HtmlFile.read() # components.html(source_code, height= 500, width=700) # # Retrieval # kg_retriever = index.as_retriever( # include_text=True, # include source text, default True # ) # Indexing splitter = SentenceSplitter(chunk_size=256) nodes = splitter.get_nodes_from_documents(documents) storage_context = StorageContext.from_defaults() storage_context.docstore.add_documents(nodes) index = VectorStoreIndex(nodes=nodes, storage_context=storage_context) # Retrieval bm25_retriever = BM25Retriever.from_defaults(nodes=nodes, similarity_top_k=10) vector_retriever = index.as_retriever(similarity_top_k=10) # Hybrid Retriever class class HybridRetriever(BaseRetriever): def __init__(self, vector_retriever, bm25_retriever): self.vector_retriever = vector_retriever self.bm25_retriever = bm25_retriever super().__init__() def _retrieve(self, query, **kwargs): bm25_nodes = self.bm25_retriever.retrieve(query, **kwargs) vector_nodes = self.vector_retriever.retrieve(query, **kwargs) all_nodes = [] node_ids = set() for n in bm25_nodes + vector_nodes: if n.node.node_id not in node_ids: all_nodes.append(n) node_ids.add(n.node.node_id) return all_nodes hybrid_retriever = HybridRetriever(vector_retriever, bm25_retriever) # Generation # model = "gpt-3.5-turbo" model = "claude-3-opus-20240229" # def get_context(query): # contexts = kg_retriever.retrieve(query) # context_list = [n.text for n in contexts] # return context_list def get_context(query): contexts = hybrid_retriever.retrieve(query) context_list = [n.get_content() for n in contexts] return context_list def res(prompt): response = openai.chat.completions.create( model=model, messages=[ {"role":"system", "content":"You are a helpful assistant who answers from the following context. If the answer can't be found in context, politely refuse" }, {"role": "user", "content": prompt, } ] ) return [response.usage.prompt_tokens, response.usage.completion_tokens, response.usage.total_tokens, response.choices[0].message.content] # Initialize session state for token summary, evaluation details, and chat messages if "token_summary" not in st.session_state: st.session_state.token_summary = [] if "messages" not in st.session_state: st.session_state.messages = [] # Display chat messages from history on app rerun for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # Summarize full_prompt = "\n\n".join([context + prompt]) orig_res = res(full_prompt) # Accept user input if prompt := st.chat_input("Enter your query:"): st.success("Fetching info...") # Add user message to chat history st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) # Generate response # st.success("Fetching info...") context_list = get_context(prompt) context = " ".join(context_list) # # Summarize # full_prompt = "\n\n".join([context + prompt]) # orig_res = res(full_prompt) # Original prompt response full_prompt = "\n\n".join([context + prompt]) orig_res = res(full_prompt) st.session_state.messages.append({"role": "assistant", "content": "Generating Original prompt response..."}) st.session_state.messages.append({"role": "assistant", "content": orig_res[3]}) st.success("Generating Original prompt response...") with st.chat_message("assistant"): st.markdown(orig_res[3]) # # Compressed Response # st.session_state.messages.append({"role": "assistant", "content": "Generating Optimized prompt response..."}) # st.success("Generating Optimized prompt response...") # llm_lingua = PromptCompressor( # model_name="microsoft/llmlingua-2-xlm-roberta-large-meetingbank", # use_llmlingua2=True, device_map="cpu" # ) # def prompt_compression(context, rate=0.5): # compressed_context = llm_lingua.compress_prompt( # context, # rate=rate, # force_tokens=["!", ".", "?", "\n"], # drop_consecutive=True, # ) # return compressed_context # compressed_context = prompt_compression(context) # full_opt_prompt = "\n\n".join([compressed_context['compressed_prompt'] + prompt]) # compressed_res = res(full_opt_prompt) # st.session_state.messages.append({"role": "assistant", "content": compressed_res[3]}) # with st.chat_message("assistant"): # st.markdown(compressed_res[3]) # # Save token summary and evaluation details to session state # scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True) # scores = scorer.score(compressed_res[3],orig_res[3]) # webert_model = WebBertSimilarity(device='cpu') # similarity_score = webert_model.predict([(compressed_res[3], orig_res[3])])[0] / 5 * 100 # # Display token summary # st.session_state.messages.append({"role": "assistant", "content": "Token Length Summary..."}) # st.success('Token Length Summary...') # st.session_state.messages.append({"role": "assistant", "content": f"Original Prompt has {orig_res[0]} tokens"}) # st.write(f"Original Prompt has {orig_res[0]} tokens") # st.session_state.messages.append({"role": "assistant", "content": f"Optimized Prompt has {compressed_res[0]} tokens"}) # st.write(f"Optimized Prompt has {compressed_res[0]} tokens") # st.session_state.messages.append({"role": "assistant", "content": "Comparing Original and Optimized Prompt Response..."}) # st.success("Comparing Original and Optimized Prompt Response...") # st.session_state.messages.append({"role": "assistant", "content": f"Rouge Score : {scores['rougeL'].fmeasure * 100}"}) # st.write(f"Rouge Score : {scores['rougeL'].fmeasure * 100}") # st.session_state.messages.append({"role": "assistant", "content": f"Semantic Text Similarity Score : {similarity_score}"}) # st.write(f"Semantic Text Similarity Score : {similarity_score}") # st.write(" ") # # origin_tokens = compressed_context['origin_tokens'] # # compressed_tokens = compressed_context['compressed_tokens'] # origin_tokens = orig_res[0] # compressed_tokens = compressed_res[0] # gpt_saving = (origin_tokens - compressed_tokens) * 0.06 / 1000 # claude_saving = (origin_tokens - compressed_tokens) * 0.015 / 1000 # mistral_saving = (origin_tokens - compressed_tokens) * 0.004 / 1000 # # st.session_state.messages.append({"role": "assistant", "content": f"""The optimized prompt has saved ${gpt_saving:.4f} in GPT4, ${mistral_saving:.4f} in Mistral"""}) # # st.success(f"""The optimized prompt has saved ${gpt_saving:.4f} in GPT4, ${mistral_saving:.4f} in Mistral""") # st.session_state.messages.append({"role": "assistant", "content": f"The optimized prompt has ${gpt_saving:.4f} saved in GPT-4."}) # st.success(f"The optimized prompt has ${gpt_saving:.4f} saved in GPT-4.") # st.success("Downloading Optimized Prompt...") # st.download_button(label = "Download Optimized Prompt", # data = full_opt_prompt, file_name='./data/optimized_prompt.txt')