johirvasu04 commited on
Commit
f426830
Β·
verified Β·
1 Parent(s): 5165d19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -20
app.py CHANGED
@@ -17,8 +17,8 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
17
  from langchain.embeddings import HuggingFaceEmbeddings
18
  from langchain.vectorstores import FAISS
19
 
20
- PDF_PATH = "C:/Users/VASU/Downloads/Sample HI Policy.pdf"
21
- CSV_PATH = "C:/Users/VASU/Downloads/RAG_Test_Questions.csv"
22
 
23
  st.set_page_config(page_title="PolicyGaido - Insurance Q&A", page_icon="πŸ“", layout="wide")
24
  st.title("Insurance Policy Q&A Assistant")
@@ -32,7 +32,7 @@ if "initialized" not in st.session_state:
32
 
33
  with st.sidebar:
34
  st.header("Configuration")
35
- model_option = st.selectbox("Select Language Model", ["BERT-for-QA", "DistilBERT-for-QA"])
36
  device = "cuda" if torch.cuda.is_available() else "cpu"
37
  st.caption(f"Running on: {device}")
38
 
@@ -110,10 +110,9 @@ def get_answer(question, model_name):
110
  qa_pipeline = pipeline("question-answering", model="deepset/bert-base-cased-squad2" if model_name == "BERT-for-QA" else "distilbert-base-cased-distilled-squad", tokenizer="deepset/bert-base-cased-squad2", device=0 if torch.cuda.is_available() else -1)
111
  result = qa_pipeline(question=question, context=context)
112
 
113
- # Calculate semantic similarity between question and context as a relevance proxy
114
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': device})
115
  question_embedding = embeddings.embed_query(question)
116
- context_embedding = embeddings.embed_query(context[:1000]) # Use first 1000 chars to avoid token limits
117
 
118
  # Compute cosine similarity
119
  similarity = np.dot(question_embedding, context_embedding) / (np.linalg.norm(question_embedding) * np.linalg.norm(context_embedding))
@@ -122,10 +121,8 @@ def get_answer(question, model_name):
122
  return result["answer"], docs, result["score"], relevance_score
123
 
124
  def evaluate_answer(answer, docs, confidence, relevance):
125
- # Count potentially hallucinatory indicators
126
  hallucination_indicators = 0
127
 
128
- # Check if answer contains content not found in supporting docs
129
  answer_found = False
130
  answer_words = set(answer.lower().split())
131
 
@@ -133,24 +130,22 @@ def evaluate_answer(answer, docs, confidence, relevance):
133
  for doc in docs:
134
  doc_content = doc.page_content.lower()
135
  overlap_count = sum(1 for word in answer_words if word in doc_content)
136
- if overlap_count / len(answer_words) > 0.3: # At least 30% of answer words are in document
137
  answer_found = True
138
  break
139
 
140
- if not answer_found and len(answer_words) > 3: # Only count if the answer is substantive
141
  hallucination_indicators += 1
142
-
143
- # Check for hedging language that might indicate uncertainty
144
  hedging_phrases = ["i think", "probably", "likely", "may", "might", "could be", "possibly", "perhaps"]
145
  if any(phrase in answer.lower() for phrase in hedging_phrases):
146
  hallucination_indicators += 1
147
 
148
- # Return hallucination risk score (0-100)
149
  hallucination_risk = min(100, hallucination_indicators * 50)
150
 
151
  return {
152
  "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
153
- "confidence": confidence * 100, # Convert to percentage
154
  "relevance": relevance,
155
  "hallucination_risk": hallucination_risk
156
  }
@@ -179,10 +174,10 @@ with col2:
179
  st.subheader("Answer")
180
  if "last_answer" in st.session_state:
181
  question, answer, docs, evaluation = st.session_state["last_answer"]
182
- st.markdown(f"**Question:** {question}")
183
- st.markdown(f"**Answer:** {answer}")
 
184
 
185
- # Display evaluation metrics
186
  col_a, col_b, col_c = st.columns(3)
187
  with col_a:
188
  st.metric("Confidence", f"{evaluation['confidence']:.1f}%",
@@ -197,16 +192,16 @@ with col2:
197
 
198
  with st.expander("View Source Information"):
199
  for i, doc in enumerate(docs):
200
- st.markdown(f"**Source {i+1}:** {doc.page_content[:500]}...")
 
201
 
202
- # History and statistics section
203
  st.divider()
204
  st.subheader("Evaluation History")
205
 
206
  if st.session_state.evaluation_history:
207
  history_df = pd.DataFrame(st.session_state.evaluation_history)
208
 
209
- # Display summary statistics
210
  st.subheader("Performance Statistics")
211
  col1, col2, col3 = st.columns(3)
212
  with col1:
@@ -216,7 +211,7 @@ if st.session_state.evaluation_history:
216
  with col3:
217
  st.metric("Avg. Hallucination Risk", f"{history_df['hallucination_risk'].mean():.1f}%")
218
 
219
- # Show history table
220
  st.dataframe(history_df)
221
  else:
222
  st.info("No evaluation history available yet. Ask some questions to build history.")
 
17
  from langchain.embeddings import HuggingFaceEmbeddings
18
  from langchain.vectorstores import FAISS
19
 
20
+ PDF_PATH = "Sample HI Policy.pdf"
21
+ CSV_PATH = "RAG_Test_Questions.csv"
22
 
23
  st.set_page_config(page_title="PolicyGaido - Insurance Q&A", page_icon="πŸ“", layout="wide")
24
  st.title("Insurance Policy Q&A Assistant")
 
32
 
33
  with st.sidebar:
34
  st.header("Configuration")
35
+ model_option = st.selectbox("Select Language Model", ["BERT-for-QA"])
36
  device = "cuda" if torch.cuda.is_available() else "cpu"
37
  st.caption(f"Running on: {device}")
38
 
 
110
  qa_pipeline = pipeline("question-answering", model="deepset/bert-base-cased-squad2" if model_name == "BERT-for-QA" else "distilbert-base-cased-distilled-squad", tokenizer="deepset/bert-base-cased-squad2", device=0 if torch.cuda.is_available() else -1)
111
  result = qa_pipeline(question=question, context=context)
112
 
 
113
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': device})
114
  question_embedding = embeddings.embed_query(question)
115
+ context_embedding = embeddings.embed_query(context[:1000]) # We are using the first 1000 chars only to avoid token limits
116
 
117
  # Compute cosine similarity
118
  similarity = np.dot(question_embedding, context_embedding) / (np.linalg.norm(question_embedding) * np.linalg.norm(context_embedding))
 
121
  return result["answer"], docs, result["score"], relevance_score
122
 
123
  def evaluate_answer(answer, docs, confidence, relevance):
 
124
  hallucination_indicators = 0
125
 
 
126
  answer_found = False
127
  answer_words = set(answer.lower().split())
128
 
 
130
  for doc in docs:
131
  doc_content = doc.page_content.lower()
132
  overlap_count = sum(1 for word in answer_words if word in doc_content)
133
+ if overlap_count / len(answer_words) > 0.3:
134
  answer_found = True
135
  break
136
 
137
+ if not answer_found and len(answer_words) > 3:
138
  hallucination_indicators += 1
139
+
 
140
  hedging_phrases = ["i think", "probably", "likely", "may", "might", "could be", "possibly", "perhaps"]
141
  if any(phrase in answer.lower() for phrase in hedging_phrases):
142
  hallucination_indicators += 1
143
 
 
144
  hallucination_risk = min(100, hallucination_indicators * 50)
145
 
146
  return {
147
  "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
148
+ "confidence": confidence * 100,
149
  "relevance": relevance,
150
  "hallucination_risk": hallucination_risk
151
  }
 
174
  st.subheader("Answer")
175
  if "last_answer" in st.session_state:
176
  question, answer, docs, evaluation = st.session_state["last_answer"]
177
+ st.markdown(f"Question: {question}")
178
+ st.markdown(f"Answer: {answer}")
179
+
180
 
 
181
  col_a, col_b, col_c = st.columns(3)
182
  with col_a:
183
  st.metric("Confidence", f"{evaluation['confidence']:.1f}%",
 
192
 
193
  with st.expander("View Source Information"):
194
  for i, doc in enumerate(docs):
195
+ st.markdown(f"Source {i+1}: {doc.page_content[:500]}...")
196
+
197
 
 
198
  st.divider()
199
  st.subheader("Evaluation History")
200
 
201
  if st.session_state.evaluation_history:
202
  history_df = pd.DataFrame(st.session_state.evaluation_history)
203
 
204
+ # Displaying the summary statistics
205
  st.subheader("Performance Statistics")
206
  col1, col2, col3 = st.columns(3)
207
  with col1:
 
211
  with col3:
212
  st.metric("Avg. Hallucination Risk", f"{history_df['hallucination_risk'].mean():.1f}%")
213
 
214
+ # Here we are showing the history table
215
  st.dataframe(history_df)
216
  else:
217
  st.info("No evaluation history available yet. Ask some questions to build history.")