GaidoInsurance / app.py
johirvasu04's picture
Update app.py
f426830 verified
raw
history blame
9.4 kB
import streamlit as st
import pandas as pd
import os
import torch
import numpy as np
from datetime import datetime
try:
from transformers import pipeline
except ImportError:
import subprocess
subprocess.check_call(["pip", "install", "transformers"])
from transformers import pipeline
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
PDF_PATH = "Sample HI Policy.pdf"
CSV_PATH = "RAG_Test_Questions.csv"
st.set_page_config(page_title="PolicyGaido - Insurance Q&A", page_icon="πŸ“", layout="wide")
st.title("Insurance Policy Q&A Assistant")
st.markdown("Ask questions about your insurance policy or select from predefined questions.")
if "initialized" not in st.session_state:
st.session_state.initialized = False
st.session_state.vector_store = None
st.session_state.questions = []
st.session_state.evaluation_history = []
with st.sidebar:
st.header("Configuration")
model_option = st.selectbox("Select Language Model", ["BERT-for-QA"])
device = "cuda" if torch.cuda.is_available() else "cpu"
st.caption(f"Running on: {device}")
if st.button("Initialize System"):
with st.spinner("Initializing Q&A system..."):
st.session_state.initialized = True
with st.expander("Performance Metrics"):
if st.session_state.evaluation_history:
total_queries = len(st.session_state.evaluation_history)
avg_confidence = np.mean([eval_data['confidence'] for eval_data in st.session_state.evaluation_history])
avg_relevance = np.mean([eval_data['relevance'] for eval_data in st.session_state.evaluation_history])
st.metric("Total Queries", total_queries)
st.metric("Avg. Confidence", f"{avg_confidence:.2f}%")
st.metric("Avg. Relevance", f"{avg_relevance:.2f}%")
if st.button("Clear History"):
st.session_state.evaluation_history = []
st.experimental_rerun()
@st.cache_data
def load_questions(csv_path):
encodings = ["utf-8", "latin-1", "ISO-8859-1", "cp1252"]
for encoding in encodings:
try:
df = pd.read_csv(csv_path, encoding=encoding)
# Check for both "Question" and "Questions" columns
if "Question" in df.columns:
return df["Question"].dropna().tolist()
elif "Questions" in df.columns:
return df["Questions"].dropna().tolist()
except UnicodeDecodeError:
continue
except Exception as e:
st.error(f"Error loading CSV: {e}")
return []
return []
if os.path.exists(CSV_PATH):
st.session_state.questions = load_questions(CSV_PATH)
if not st.session_state.questions:
st.warning(f"No questions found in {CSV_PATH}. Make sure the file has a 'Questions' column.")
@st.cache_resource
def process_pdf(pdf_path):
try:
loader = PyPDFLoader(pdf_path)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': device})
vector_store = FAISS.from_documents(chunks, embeddings)
return vector_store
except Exception as e:
st.error(f"Error processing PDF: {e}")
return None
if st.session_state.initialized and not st.session_state.vector_store:
with st.spinner("Processing policy document..."):
st.session_state.vector_store = process_pdf(PDF_PATH)
def get_answer(question, model_name):
if st.session_state.vector_store is None:
return "System not initialized. Please initialize first.", [], 0
retriever = st.session_state.vector_store.as_retriever(search_kwargs={"k": 3})
docs = retriever.get_relevant_documents(question)
context = " ".join([doc.page_content for doc in docs])
if not context:
return "No relevant information found in the policy document.", [], 0
qa_pipeline = pipeline("question-answering", model="deepset/bert-base-cased-squad2" if model_name == "BERT-for-QA" else "distilbert-base-cased-distilled-squad", tokenizer="deepset/bert-base-cased-squad2", device=0 if torch.cuda.is_available() else -1)
result = qa_pipeline(question=question, context=context)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': device})
question_embedding = embeddings.embed_query(question)
context_embedding = embeddings.embed_query(context[:1000]) # We are using the first 1000 chars only to avoid token limits
# Compute cosine similarity
similarity = np.dot(question_embedding, context_embedding) / (np.linalg.norm(question_embedding) * np.linalg.norm(context_embedding))
relevance_score = float(similarity * 100)
return result["answer"], docs, result["score"], relevance_score
def evaluate_answer(answer, docs, confidence, relevance):
hallucination_indicators = 0
answer_found = False
answer_words = set(answer.lower().split())
if len(answer_words) > 0:
for doc in docs:
doc_content = doc.page_content.lower()
overlap_count = sum(1 for word in answer_words if word in doc_content)
if overlap_count / len(answer_words) > 0.3:
answer_found = True
break
if not answer_found and len(answer_words) > 3:
hallucination_indicators += 1
hedging_phrases = ["i think", "probably", "likely", "may", "might", "could be", "possibly", "perhaps"]
if any(phrase in answer.lower() for phrase in hedging_phrases):
hallucination_indicators += 1
hallucination_risk = min(100, hallucination_indicators * 50)
return {
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"confidence": confidence * 100,
"relevance": relevance,
"hallucination_risk": hallucination_risk
}
col1, col2 = st.columns([1, 1])
with col1:
st.subheader("Ask a Question")
question_method = st.radio("Choose question input method:", ["Predefined", "Custom"])
question = ""
if question_method == "Predefined" and st.session_state.questions:
question = st.selectbox("Select a question:", st.session_state.questions)
if not st.session_state.questions:
st.info("No predefined questions available. Please check your CSV file.")
elif question_method == "Custom":
question = st.text_area("Enter your question:")
if question and st.button("Ask Question"):
with st.spinner("Generating answer..."):
answer, docs, confidence, relevance = get_answer(question, model_option)
evaluation = evaluate_answer(answer, docs, confidence, relevance)
st.session_state.evaluation_history.append(evaluation)
st.session_state["last_answer"] = (question, answer, docs, evaluation)
with col2:
st.subheader("Answer")
if "last_answer" in st.session_state:
question, answer, docs, evaluation = st.session_state["last_answer"]
st.markdown(f"Question: {question}")
st.markdown(f"Answer: {answer}")
col_a, col_b, col_c = st.columns(3)
with col_a:
st.metric("Confidence", f"{evaluation['confidence']:.1f}%",
delta=f"{evaluation['confidence']-50:.1f}" if evaluation['confidence'] > 50 else f"{evaluation['confidence']-50:.1f}")
with col_b:
st.metric("Relevance", f"{evaluation['relevance']:.1f}%",
delta=f"{evaluation['relevance']-50:.1f}" if evaluation['relevance'] > 50 else f"{evaluation['relevance']-50:.1f}")
with col_c:
st.metric("Hallucination Risk", f"{evaluation['hallucination_risk']:.1f}%",
delta=f"{-evaluation['hallucination_risk']+50:.1f}" if evaluation['hallucination_risk'] < 50 else f"{-evaluation['hallucination_risk']+50:.1f}",
delta_color="inverse")
with st.expander("View Source Information"):
for i, doc in enumerate(docs):
st.markdown(f"Source {i+1}: {doc.page_content[:500]}...")
st.divider()
st.subheader("Evaluation History")
if st.session_state.evaluation_history:
history_df = pd.DataFrame(st.session_state.evaluation_history)
# Displaying the summary statistics
st.subheader("Performance Statistics")
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Avg. Confidence", f"{history_df['confidence'].mean():.1f}%")
with col2:
st.metric("Avg. Relevance", f"{history_df['relevance'].mean():.1f}%")
with col3:
st.metric("Avg. Hallucination Risk", f"{history_df['hallucination_risk'].mean():.1f}%")
# Here we are showing the history table
st.dataframe(history_df)
else:
st.info("No evaluation history available yet. Ask some questions to build history.")
st.divider()
st.caption("PolicyGaido Insurance Q&A Assistant | Built with Streamlit, Transformers, and FAISS (By Vasu Johri)")