import streamlit as st import pandas as pd import os import torch import numpy as np from datetime import datetime try: from transformers import pipeline except ImportError: import subprocess subprocess.check_call(["pip", "install", "transformers"]) from transformers import pipeline from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS PDF_PATH = "Sample HI Policy.pdf" CSV_PATH = "RAG_Test_Questions.csv" st.set_page_config(page_title="PolicyGaido - Insurance Q&A", page_icon="📝", layout="wide") st.title("Insurance Policy Q&A Assistant") st.markdown("Ask questions about your insurance policy or select from predefined questions.") if "initialized" not in st.session_state: st.session_state.initialized = False st.session_state.vector_store = None st.session_state.questions = [] st.session_state.evaluation_history = [] with st.sidebar: st.header("Configuration") model_option = st.selectbox("Select Language Model", ["BERT-for-QA"]) device = "cuda" if torch.cuda.is_available() else "cpu" st.caption(f"Running on: {device}") if st.button("Initialize System"): with st.spinner("Initializing Q&A system..."): st.session_state.initialized = True with st.expander("Performance Metrics"): if st.session_state.evaluation_history: total_queries = len(st.session_state.evaluation_history) avg_confidence = np.mean([eval_data['confidence'] for eval_data in st.session_state.evaluation_history]) avg_relevance = np.mean([eval_data['relevance'] for eval_data in st.session_state.evaluation_history]) st.metric("Total Queries", total_queries) st.metric("Avg. Confidence", f"{avg_confidence:.2f}%") st.metric("Avg. Relevance", f"{avg_relevance:.2f}%") if st.button("Clear History"): st.session_state.evaluation_history = [] st.experimental_rerun() @st.cache_data def load_questions(csv_path): encodings = ["utf-8", "latin-1", "ISO-8859-1", "cp1252"] for encoding in encodings: try: df = pd.read_csv(csv_path, encoding=encoding) # Check for both "Question" and "Questions" columns if "Question" in df.columns: return df["Question"].dropna().tolist() elif "Questions" in df.columns: return df["Questions"].dropna().tolist() except UnicodeDecodeError: continue except Exception as e: st.error(f"Error loading CSV: {e}") return [] return [] if os.path.exists(CSV_PATH): st.session_state.questions = load_questions(CSV_PATH) if not st.session_state.questions: st.warning(f"No questions found in {CSV_PATH}. Make sure the file has a 'Questions' column.") @st.cache_resource def process_pdf(pdf_path): try: loader = PyPDFLoader(pdf_path) documents = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) chunks = text_splitter.split_documents(documents) embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': device}) vector_store = FAISS.from_documents(chunks, embeddings) return vector_store except Exception as e: st.error(f"Error processing PDF: {e}") return None if st.session_state.initialized and not st.session_state.vector_store: with st.spinner("Processing policy document..."): st.session_state.vector_store = process_pdf(PDF_PATH) def get_answer(question, model_name): if st.session_state.vector_store is None: return "System not initialized. Please initialize first.", [], 0 retriever = st.session_state.vector_store.as_retriever(search_kwargs={"k": 3}) docs = retriever.get_relevant_documents(question) context = " ".join([doc.page_content for doc in docs]) if not context: return "No relevant information found in the policy document.", [], 0 qa_pipeline = pipeline("question-answering", model="deepset/bert-base-cased-squad2" if model_name == "BERT-for-QA" else "distilbert-base-cased-distilled-squad", tokenizer="deepset/bert-base-cased-squad2", device=0 if torch.cuda.is_available() else -1) result = qa_pipeline(question=question, context=context) embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': device}) question_embedding = embeddings.embed_query(question) context_embedding = embeddings.embed_query(context[:1000]) # We are using the first 1000 chars only to avoid token limits # Compute cosine similarity similarity = np.dot(question_embedding, context_embedding) / (np.linalg.norm(question_embedding) * np.linalg.norm(context_embedding)) relevance_score = float(similarity * 100) return result["answer"], docs, result["score"], relevance_score def evaluate_answer(answer, docs, confidence, relevance): hallucination_indicators = 0 answer_found = False answer_words = set(answer.lower().split()) if len(answer_words) > 0: for doc in docs: doc_content = doc.page_content.lower() overlap_count = sum(1 for word in answer_words if word in doc_content) if overlap_count / len(answer_words) > 0.3: answer_found = True break if not answer_found and len(answer_words) > 3: hallucination_indicators += 1 hedging_phrases = ["i think", "probably", "likely", "may", "might", "could be", "possibly", "perhaps"] if any(phrase in answer.lower() for phrase in hedging_phrases): hallucination_indicators += 1 hallucination_risk = min(100, hallucination_indicators * 50) return { "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "confidence": confidence * 100, "relevance": relevance, "hallucination_risk": hallucination_risk } col1, col2 = st.columns([1, 1]) with col1: st.subheader("Ask a Question") question_method = st.radio("Choose question input method:", ["Predefined", "Custom"]) question = "" if question_method == "Predefined" and st.session_state.questions: question = st.selectbox("Select a question:", st.session_state.questions) if not st.session_state.questions: st.info("No predefined questions available. Please check your CSV file.") elif question_method == "Custom": question = st.text_area("Enter your question:") if question and st.button("Ask Question"): with st.spinner("Generating answer..."): answer, docs, confidence, relevance = get_answer(question, model_option) evaluation = evaluate_answer(answer, docs, confidence, relevance) st.session_state.evaluation_history.append(evaluation) st.session_state["last_answer"] = (question, answer, docs, evaluation) with col2: st.subheader("Answer") if "last_answer" in st.session_state: question, answer, docs, evaluation = st.session_state["last_answer"] st.markdown(f"Question: {question}") st.markdown(f"Answer: {answer}") col_a, col_b, col_c = st.columns(3) with col_a: st.metric("Confidence", f"{evaluation['confidence']:.1f}%", delta=f"{evaluation['confidence']-50:.1f}" if evaluation['confidence'] > 50 else f"{evaluation['confidence']-50:.1f}") with col_b: st.metric("Relevance", f"{evaluation['relevance']:.1f}%", delta=f"{evaluation['relevance']-50:.1f}" if evaluation['relevance'] > 50 else f"{evaluation['relevance']-50:.1f}") with col_c: st.metric("Hallucination Risk", f"{evaluation['hallucination_risk']:.1f}%", delta=f"{-evaluation['hallucination_risk']+50:.1f}" if evaluation['hallucination_risk'] < 50 else f"{-evaluation['hallucination_risk']+50:.1f}", delta_color="inverse") with st.expander("View Source Information"): for i, doc in enumerate(docs): st.markdown(f"Source {i+1}: {doc.page_content[:500]}...") st.divider() st.subheader("Evaluation History") if st.session_state.evaluation_history: history_df = pd.DataFrame(st.session_state.evaluation_history) # Displaying the summary statistics st.subheader("Performance Statistics") col1, col2, col3 = st.columns(3) with col1: st.metric("Avg. Confidence", f"{history_df['confidence'].mean():.1f}%") with col2: st.metric("Avg. Relevance", f"{history_df['relevance'].mean():.1f}%") with col3: st.metric("Avg. Hallucination Risk", f"{history_df['hallucination_risk'].mean():.1f}%") # Here we are showing the history table st.dataframe(history_df) else: st.info("No evaluation history available yet. Ask some questions to build history.") st.divider() st.caption("PolicyGaido Insurance Q&A Assistant | Built with Streamlit, Transformers, and FAISS (By Vasu Johri)")