jayash391 commited on
Commit
1fc474a
·
verified ·
1 Parent(s): 64bf7bd

Upload 3 files

Browse files
Files changed (3) hide show
  1. env (1) +9 -0
  2. nexus.py +288 -0
  3. requirements (1).txt +15 -0
env (1) ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ VECTARA_INDEX_API_KEY = "zwt_Vo9cpGzm6QVtABcdnzVq6QXLdGIP4YAcvcyEAA"
2
+ VECTARA_QUERY_API_KEY = "zqt_Vo9cpBoyEjUQdcTVo2W5hmMKPueBUroBLoGwNQ"
3
+ VECTARA_API_KEY = "zut_Vo9cpHni2hWF_DPJAXmRFKkWzRTWbi-8JwnSxA"
4
+ VECTARA_CORPUS_ID = "2"
5
+ VECTARA_CUSTOMER_ID = "1452235940"
6
+ TOGETHER_API = "7e6c200b7b36924bc1b4a5973859a20d2efa7180e9b5c977301173a6c099136b"
7
+ GOOGLE_SEARCH_API_KEY = "AIzaSyALmmMjvmrmHGtjjuPLEMy6Bp2qgMQJ3Ck"
8
+ UNSTRUCTURED_API_KEY = "eBqsGxYYIfTdPRH7PEveZGVIH6ZHny"
9
+ PINECONE_API_KEY = "4523c180-39fd-4c48-99e8-88164df85b0a"
nexus.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index.indices.managed.vectara import VectaraIndex
2
+ from dotenv import load_dotenv
3
+ import os
4
+ from docx import Document
5
+ from llama_index.llms.together import TogetherLLM
6
+ from llama_index.core.llms import ChatMessage, MessageRole
7
+ from Bio import Entrez
8
+ import ssl
9
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
10
+ import streamlit as st
11
+ from googleapiclient.discovery import build
12
+ from typing import List, Optional
13
+
14
+ load_dotenv()
15
+
16
+ os.environ["VECTARA_INDEX_API_KEY"] = os.getenv("VECTARA_INDEX_API_KEY", "zwt_Vo9cpGzm6QVtABcdnzVq6QXLdGIP4YAcvcyEAA")
17
+ os.environ["VECTARA_QUERY_API_KEY"] = os.getenv("VECTARA_QUERY_API_KEY", "zqt_Vo9cpBoyEjUQdcTVo2W5hmMKPueBUroBLoGwNQ")
18
+ os.environ["VECTARA_API_KEY"] = os.getenv("VECTARA_API_KEY", "zut_Vo9cpHni2hWF_DPJAXmRFKkWzRTWbi-8JwnSxA")
19
+ os.environ["VECTARA_CORPUS_ID"] = os.getenv("VECTARA_CORPUS_ID", "2")
20
+ os.environ["VECTARA_CUSTOMER_ID"] = os.getenv("VECTARA_CUSTOMER_ID", "1452235940")
21
+ os.environ["TOGETHER_API"] = os.getenv("TOGETHER_API", "7e6c200b7b36924bc1b4a5973859a20d2efa7180e9b5c977301173a6c099136b")
22
+ os.environ["GOOGLE_SEARCH_API_KEY"] = os.getenv("GOOGLE_SEARCH_API_KEY", "AIzaSyALmmMjvmrmHGtjjuPLEMy6Bp2qgMQJ3Ck")
23
+
24
+ # Initialize the Vectara index
25
+ index = VectaraIndex()
26
+
27
+ endpoint = 'https://api.together.xyz/inference'
28
+
29
+ # Load the hallucination evaluation model
30
+ model_name = "vectara/hallucination_evaluation_model"
31
+ model = AutoModelForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
32
+ tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
33
+
34
+ def vectara_hallucination_evaluation_model(text):
35
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
36
+ outputs = model(**inputs)
37
+ hallucination_probability = outputs.logits[0][0].item()
38
+ return hallucination_probability
39
+
40
+ def search_pubmed(query: str) -> Optional[List[str]]:
41
+ """
42
+ Searches PubMed for a given query and returns a list of formatted results
43
+ (or None if no results are found).
44
+ """
45
+ Entrez.email = "vikas.ranaksvt@gmail.com"
46
+
47
+ try:
48
+ ssl._create_default_https_context = ssl._create_unverified_context
49
+
50
+ handle = Entrez.esearch(db="pubmed", term=query, retmax=3)
51
+ record = Entrez.read(handle)
52
+ id_list = record["IdList"]
53
+
54
+ if not id_list:
55
+ return None
56
+
57
+ handle = Entrez.efetch(db="pubmed", id=id_list, retmode="xml")
58
+ articles = Entrez.read(handle)
59
+
60
+ results = []
61
+ for article in articles['PubmedArticle']:
62
+ try:
63
+ medline_citation = article['MedlineCitation']
64
+ article_data = medline_citation['Article']
65
+ title = article_data['ArticleTitle']
66
+ abstract = article_data.get('Abstract', {}).get('AbstractText', [""])[0]
67
+
68
+ result = f"**Title:** {title}\n**Abstract:** {abstract}\n"
69
+ result += f"**Link:** https://pubmed.ncbi.nlm.gov/{medline_citation['PMID']}\n\n"
70
+ results.append(result)
71
+ except KeyError as e:
72
+ print(f"Error parsing article: {article}, Error: {e}")
73
+
74
+ return results
75
+
76
+ except Exception as e:
77
+ print(f"Error accessing PubMed: {e}")
78
+ return None
79
+
80
+ def chat_with_pubmed(article_text, article_link):
81
+ """
82
+ Engages in a chat-like interaction with a PubMed article using TogetherLLM.
83
+ """
84
+ try:
85
+ llm = TogetherLLM(model="QWEN/QWEN1.5-14B-CHAT", api_key=os.environ['TOGETHER_API'])
86
+ messages = [
87
+ ChatMessage(role=MessageRole.SYSTEM, content="You are a helpful AI assistant summarizing and answering questions about the following medical research article: " + article_link),
88
+ ChatMessage(role=MessageRole.USER, content=article_text)
89
+ ]
90
+ response = llm.chat(messages)
91
+ return str(response) if response else "I'm sorry, I couldn't generate a summary for this article."
92
+ except Exception as e:
93
+ print(f"Error in chat_with_pubmed: {e}")
94
+ return "An error occurred while generating a summary."
95
+
96
+ def search_web(query: str, num_results: int = 3) -> Optional[List[str]]:
97
+ """
98
+ Searches the web using the Google Search API and returns a list of formatted results
99
+ (or None if no results are found).
100
+ """
101
+ try:
102
+ service = build("customsearch", "v1", developerKey=os.environ["GOOGLE_SEARCH_API_KEY"])
103
+
104
+ # Execute the search request
105
+ res = service.cse().list(q=query, cx="6128965e5bcae442b", num=num_results).execute()
106
+
107
+ if "items" not in res:
108
+ return None
109
+
110
+ results = []
111
+ for item in res["items"]:
112
+ title = item["title"]
113
+ link = item["link"]
114
+ snippet = item["snippet"]
115
+ result = f"**Title:** {title}\n**Link:** {link}\n**Snippet:** {snippet}\n\n"
116
+ results.append(result)
117
+
118
+ return results
119
+
120
+ except Exception as e:
121
+ print(f"Error performing web search: {e}")
122
+ return None
123
+
124
+ def NEXUS_chatbot(user_input, chat_history=None):
125
+ """
126
+ Processes user input, interacts with various resources, and generates a response.
127
+ Handles potential errors, maintains chat history, and evaluates hallucination risk.
128
+ """
129
+
130
+ if chat_history is None:
131
+ chat_history = []
132
+
133
+ response_parts = [] # Collect responses from different sources
134
+
135
+ try:
136
+ # Vectara Search
137
+ try:
138
+ query_str = user_input
139
+ response = index.as_query_engine().query(query_str)
140
+ response_parts.append(f"**NEXUS Vectara Knowledge Base Response:**\n{response.response}")
141
+ except Exception as e:
142
+ print(f"Error in Vectara search: {e}")
143
+ response_parts.append("Vectara knowledge base is currently unavailable.")
144
+
145
+ # PubMed Search and Chat
146
+ pubmed_results = search_pubmed(user_input)
147
+ if pubmed_results:
148
+ response_parts.append("**PubMed Articles (Chat & Summarize):**")
149
+ for article_text in pubmed_results:
150
+ title, abstract, link = article_text.split("\n")[:3]
151
+ chat_summary = chat_with_pubmed(abstract, link)
152
+ response_parts.append(f"{title}\n{chat_summary}\n{link}\n")
153
+ else:
154
+ response_parts.append("No relevant PubMed articles found.")
155
+
156
+ # Web Search
157
+ web_results = search_web(user_input)
158
+ if web_results:
159
+ response_parts.append("**Web Search Results:**")
160
+ response_parts.extend(web_results)
161
+ else:
162
+ response_parts.append("No relevant web search results found.")
163
+
164
+ # Combine response parts into a single string
165
+ response_text = "\n\n".join(response_parts)
166
+
167
+ # Hallucination Evaluation
168
+ def vectara_hallucination_evaluation_model(text):
169
+ inputs = tokenizer(text, return_tensors="pt")
170
+ outputs = model(**inputs)
171
+ hallucination_probability = outputs.logits[0][0].item()
172
+ return hallucination_probability
173
+
174
+ hallucination_score = vectara_hallucination_evaluation_model(response_text)
175
+ HIGH_HALLUCINATION_THRESHOLD = 0.9
176
+ if hallucination_score > HIGH_HALLUCINATION_THRESHOLD:
177
+ response_text = "I'm still under development and learning. I cannot confidently answer this question yet."
178
+
179
+ except Exception as e:
180
+ print(f"Error in chatbot: {e}")
181
+ response_text = "An error occurred. Please try again later."
182
+
183
+ chat_history.append((user_input, response_text))
184
+ return response_text, chat_history
185
+
186
+ def show_info_popup():
187
+ with st.expander("How to use NEXUS"):
188
+ st.write("""
189
+ **NEXUS is an AI-powered chatbot designed to assist with medical information.**
190
+ **Capabilities:**
191
+ * **Answers general medical questions:** NEXUS utilizes a curated medical knowledge base to provide answers to a wide range of health-related inquiries.
192
+ * **Summarizes relevant research articles from PubMed:** The chatbot can retrieve and summarize research articles from the PubMed database, making complex scientific information more accessible.
193
+ * **Provides insights from a curated medical knowledge base:** Beyond simple answers, NEXUS offers additional insights and context from its knowledge base to enhance understanding.
194
+ * **Perform safe web searches related to your query:** The chatbot can perform web searches using the Google Search API, ensuring the safety and relevance of the results.
195
+ **Limitations:**
196
+ * **Not a substitute for professional medical advice:** NEXUS is not intended to replace professional medical diagnosis and treatment. Always consult a qualified healthcare provider for personalized medical advice.
197
+ * **General knowledge and educational purposes:** The information provided by NEXUS is for general knowledge and educational purposes only and may not be exhaustive or specific to individual situations.
198
+ * **Under development:** NEXUS is still under development and may occasionally provide inaccurate or incomplete information. It's important to critically evaluate responses and cross-reference with reliable sources.
199
+ * **Hallucination potential:** While NEXUS employs a hallucination evaluation model to minimize the risk of generating fabricated information, there remains a possibility of encountering inaccurate responses, especially for complex or niche queries.
200
+ **How to use:**
201
+ 1. **Type your medical question in the text box.**
202
+ 2. **NEXUS will provide a comprehensive response combining information from various sources.** This may include insights from its knowledge base, summaries of relevant research articles, and safe web search results.
203
+ 3. **You can continue the conversation by asking follow-up questions or providing additional context.** This helps NEXUS refine its search and offer more tailored information.
204
+ 4. **in case the NEXUS doesn't show the output please check your internet connection or rerun the same command**
205
+ 5. **user can either chat with the documents or with generate resposne from vectara + pubmed + web search**
206
+ 5. **chat with document feature is still under development so it would be better to avoid using it for now**
207
+ """)
208
+
209
+ # Initialize session state
210
+ if 'chat_history' not in st.session_state:
211
+ st.session_state.chat_history = []
212
+
213
+ # Define function to display chat history with highlighted user input and chatbot response
214
+ def display_chat_history():
215
+ for user_msg, bot_msg in st.session_state.chat_history:
216
+ st.info(f"**You:** {user_msg}")
217
+ st.success(f"**NEXUS:** {bot_msg}")
218
+
219
+ # Define function to clear chat history
220
+ def clear_chat():
221
+ st.session_state.chat_history = []
222
+
223
+ def main():
224
+ # Streamlit Page Configuration
225
+ st.set_page_config(page_title="NEXUS Chatbot", layout="wide")
226
+
227
+ # Custom Styles
228
+ st.markdown(
229
+ """
230
+ <style>
231
+ .css-18e3th9 {
232
+ padding-top: 2rem;
233
+ padding-right: 1rem;
234
+ padding-bottom: 2rem;
235
+ padding-left: 1rem;
236
+ }
237
+ .stButton>button {
238
+ background-color: #4CAF50;
239
+ color: white;
240
+ }
241
+ body {
242
+ background-color: #F0FDF4;
243
+ color: #333333;
244
+ }
245
+ .stMarkdown h1, .stMarkdown h2, .stMarkdown h3, .stMarkdown h4, .stMarkdown h5, .stMarkdown h6 {
246
+ color: #388E3C;
247
+ }
248
+ </style>
249
+ """,
250
+ unsafe_allow_html=True,
251
+ )
252
+
253
+ # Title and Introduction
254
+ st.title("NEXUS Chatbot")
255
+ st.write("Ask your medical questions and get reliable information!")
256
+
257
+ # Example Questions (Sidebar)
258
+ example_questions = [
259
+ "What are the symptoms of COVID-19?",
260
+ "How can I manage my diabetes?",
261
+ "What are the potential side effects of ibuprofen?",
262
+ "What lifestyle changes can help prevent heart disease?"
263
+ ]
264
+ st.sidebar.header("Example Questions")
265
+ for question in example_questions:
266
+ st.sidebar.write(question)
267
+
268
+ # Output Container
269
+ output_container = st.container()
270
+
271
+ # User Input and Chat History
272
+ input_container = st.container()
273
+ with input_container:
274
+ user_input = st.text_input("You: ", key="input_placeholder", placeholder="Type your medical question here...")
275
+ new_chat_button = st.button("Start New Chat")
276
+ if new_chat_button:
277
+ st.session_state.chat_history = [] # Clear chat history
278
+
279
+ if user_input:
280
+ response, st.session_state.chat_history = NEXUS_chatbot(user_input, st.session_state.chat_history)
281
+ with output_container:
282
+ display_chat_history()
283
+
284
+ # Information Popup
285
+ show_info_popup()
286
+
287
+ if __name__ == "__main__":
288
+ main()
requirements (1).txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ llama-index
2
+ python-dotenv
3
+ PyPDF2
4
+ python-docx
5
+ sentence-transformers
6
+ biopython
7
+ langchain
8
+ transformers
9
+ streamlit
10
+ google-api-python-client
11
+ langchain-community
12
+ llama-index-embeddings-huggingface
13
+ llama-index-llms-together
14
+ llama-index-indices-managed-vectara
15
+ peft