Spaces:

gauravgulati619
/

MediVox

Running

App Files Files Community

Update app.py

by aryanxxvii - opened 14 days ago

base: refs/heads/main

←

from: refs/pr/8

Discussion Files changed

+103

-64

Files changed (4) hide show

README.md +3 -3
app.py +61 -60
brain.py +36 -1
medical.pdf +3 -0

README.md CHANGED Viewed

@@ -41,18 +41,18 @@ ELEVENLABS_API_KEY=your_elevenlabs_api_key
 ## Model Details
-- Vision Model: LLaVA 3.2 11B
 - Speech-to-Text: Whisper Large V3
 - Text Generation: Groq
 - Voice Generation: ElevenLabs
-- Embeddings: sentence-transformers/all-mpnet-base-v2
 ## Citation
 If you use this space, please cite:
 ```
 @misc{medivoicebot2024,
-  author = {Your Name},
   title = {AI Doctor with Vision and Voice},
   year = {2024},
   publisher = {Hugging Face Spaces},

 ## Model Details
+- Vision Model: LLaVA 3.2 90B
 - Speech-to-Text: Whisper Large V3
 - Text Generation: Groq
 - Voice Generation: ElevenLabs
+- Embeddings: sentence-transformers/all-MiniLM-L6-v2
 ## Citation
 If you use this space, please cite:
 ```
 @misc{medivoicebot2024,
+  author = {Gaurav Gulati},
   title = {AI Doctor with Vision and Voice},
   year = {2024},
   publisher = {Hugging Face Spaces},

app.py CHANGED Viewed

@@ -38,56 +38,57 @@ embeddings = SentenceTransformerEmbeddings(
     device=device
 )
-# Define vectorstore paths consistently
-VECTORSTORE_DIR = "vectorstore/db_faiss"
-vectorstore_path = pathlib.Path(VECTORSTORE_DIR)
-# Create vectorstore directory if it doesn't exist
-vectorstore_path.mkdir(parents=True, exist_ok=True)
-if not (vectorstore_path / "index.faiss").exists():
-    print("Creating new vectorstore...")
-    # Load and split the PDF
-    loader = PyPDFLoader("medical.pdf")
-    documents = loader.load()
-    # Split documents into chunks
-    text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=2000,
-        chunk_overlap=100,
-        length_function=len,
-    )
-    texts = text_splitter.split_documents(documents)
-    # Create and save the vectorstore
-    vectorstore = FAISS.from_documents(texts, embeddings)
-    # If CUDA is available, convert index to GPU
-    if device == "cuda":
-        res = faiss.StandardGpuResources()  # Initialize GPU resources
-        index = vectorstore.index
-        gpu_index = faiss.index_cpu_to_gpu(res, 0, index)  # Move to GPU
-        vectorstore.index = gpu_index
-    # Save the vectorstore
-    vectorstore.save_local(VECTORSTORE_DIR)
-    print("Vectorstore created and saved successfully.")
-else:
-    print("Loading existing vectorstore...")
-    # Load existing vectorstore
-    vectorstore = FAISS.load_local(
-        folder_path=VECTORSTORE_DIR,
-        embeddings=embeddings,
-        allow_dangerous_deserialization=True
-    )
-    # If CUDA is available, convert loaded index to GPU
-    if device == "cuda":
-        res = faiss.StandardGpuResources()  # Initialize GPU resources
-        index = vectorstore.index
-        gpu_index = faiss.index_cpu_to_gpu(res, 0, index)  # Move to GPU
-        vectorstore.index = gpu_index
-    print("Vectorstore loaded successfully.")
 def get_relevant_context(query):
     try:
@@ -96,23 +97,21 @@ def get_relevant_context(query):
         # Extract and combine the content from retrieved documents
         context = "\n".join([doc.page_content for doc in docs])
-        return context
     except Exception as e:
         print(f"Error in similarity search: {e}")
         return "Could not retrieve relevant context."
 # Update system prompt to include retrieved context
 def get_enhanced_prompt(query, context):
-    enhanced_prompt = f"""You have to act as a professional doctor, i know you are not but this is for learning purpose.
-    Use the following medical context to inform your response: {context}
-    What's in this image?. Do you find anything wrong with it medically?
-    If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in
-    your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
-    Donot say 'In the image I see' but say 'With what I see, I think you have ....'
-    Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
-    Keep your answer concise (max 2 sentences). No preamble, start your answer right away please.
-    Question from patient: {query}"""
     return enhanced_prompt
 def process_inputs(audio_filepath, image_filepath):
@@ -126,7 +125,7 @@ def process_inputs(audio_filepath, image_filepath):
     # Handle the image input
     if image_filepath:
         enhanced_prompt = get_enhanced_prompt(speech_to_text_output, context)
-        doctor_response = analyze_image_with_query(query=enhanced_prompt, encoded_image=encode_image(image_filepath), model="llama-3.2-11b-vision-preview")
     else:
         doctor_response = "No image provided for me to analyze"
@@ -134,7 +133,7 @@ def process_inputs(audio_filepath, image_filepath):
     output_filepath = "output_audio.mp3"
     voice_of_doctor = text_to_speech_with_elevenlabs(input_text=doctor_response, output_filepath=output_filepath)
-    return speech_to_text_output, doctor_response, output_filepath
 # Create the interface
@@ -145,6 +144,8 @@ iface = gr.Interface(
         gr.Image(type="filepath")
     ],
     outputs=[
         gr.Textbox(label="Speech to Text"),
         gr.Textbox(label="Doctor's Response"),
         gr.Audio(label="Doctor's Voice")

     device=device
 )
+def create_vectorstore():
+    # Define vectorstore paths consistently
+    VECTORSTORE_DIR = "vectorstore/db_faiss"
+    vectorstore_path = pathlib.Path(VECTORSTORE_DIR)
+    # Create vectorstore directory if it doesn't exist
+    vectorstore_path.mkdir(parents=True, exist_ok=True)
+    if not (vectorstore_path / "index.faiss").exists():
+        print("Creating new vectorstore...")
+        # Load and split the PDF
+        loader = PyPDFLoader("medical.pdf")
+        documents = loader.load()
+        # Split documents into chunks
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=2000,
+            chunk_overlap=100,
+            length_function=len,
+        )
+        texts = text_splitter.split_documents(documents)
+        # Create and save the vectorstore
+        vectorstore = FAISS.from_documents(texts, embeddings)
+        # If CUDA is available, convert index to GPU
+        if device == "cuda":
+            res = faiss.StandardGpuResources()  # Initialize GPU resources
+            index = vectorstore.index
+            gpu_index = faiss.index_cpu_to_gpu(res, 0, index)  # Move to GPU
+            vectorstore.index = gpu_index
+        # Save the vectorstore
+        vectorstore.save_local(VECTORSTORE_DIR)
+        print("Vectorstore created and saved successfully.")
+    else:
+        print("Loading existing vectorstore...")
+        # Load existing vectorstore
+        vectorstore = FAISS.load_local(
+            folder_path=VECTORSTORE_DIR,
+            embeddings=embeddings,
+            allow_dangerous_deserialization=True
+        )
+        # If CUDA is available, convert loaded index to GPU
+        if device == "cuda":
+            res = faiss.StandardGpuResources()  # Initialize GPU resources
+            index = vectorstore.index
+            gpu_index = faiss.index_cpu_to_gpu(res, 0, index)  # Move to GPU
+            vectorstore.index = gpu_index
+        print("Vectorstore loaded successfully.")
 def get_relevant_context(query):
     try:
         # Extract and combine the content from retrieved documents
         context = "\n".join([doc.page_content for doc in docs])
+        context = "Use the following medical context to inform your response: " + context
+        return context if not context else ""
     except Exception as e:
         print(f"Error in similarity search: {e}")
         return "Could not retrieve relevant context."
 # Update system prompt to include retrieved context
 def get_enhanced_prompt(query, context):
+    enhanced_prompt = f"""### **Patient Information**:
+**Patient Query**: {query}
+{context}
+"""
     return enhanced_prompt
 def process_inputs(audio_filepath, image_filepath):
     # Handle the image input
     if image_filepath:
         enhanced_prompt = get_enhanced_prompt(speech_to_text_output, context)
+        doctor_response = analyze_image_with_query(query=enhanced_prompt, encoded_image=encode_image(image_filepath), model="llama-3.2-90b-vision-preview")
     else:
         doctor_response = "No image provided for me to analyze"
     output_filepath = "output_audio.mp3"
     voice_of_doctor = text_to_speech_with_elevenlabs(input_text=doctor_response, output_filepath=output_filepath)
+    return enhanced_prompt, encoded_image, speech_to_text_output, doctor_response, output_filepath
 # Create the interface
         gr.Image(type="filepath")
     ],
     outputs=[
+        gr.Textbox(label="Prompt"),
+        gr.Textbox(label="Encoded Image"),
         gr.Textbox(label="Speech to Text"),
         gr.Textbox(label="Doctor's Response"),
         gr.Audio(label="Doctor's Voice")

brain.py CHANGED Viewed

@@ -15,11 +15,46 @@ from groq import Groq
 query="Is there something wrong with my face?"
 model="llama-3.2-90b-vision-preview"
 def analyze_image_with_query(query, model, encoded_image):
     client=Groq()
     messages=[
-        {
             "role": "user",
             "content": [
                 {

 query="Is there something wrong with my face?"
 model="llama-3.2-90b-vision-preview"
+system_prompt="""You are now acting as a **highly skilled, board-certified medical doctor** specializing in **general medicine, diagnostics, and holistic treatments**.
+Your role is to provide **accurate, empathetic, and professional medical advice** based on the patient's symptoms, history, and any images they provide.
+## **Response Guidelines**:
+- Analyze the patient's question **as if you were a real doctor**.
+- If an image is provided, interpret it carefully and describe any medical findings without explicitly stating "I see in the image". Instead, use **natural doctor-patient conversation** style.
+- When diagnosing, consider multiple possibilities (**differential diagnosis**) and explain the reasoning behind them.
+- Provide **both pharmaceutical and natural remedies** where applicable.
+- **Always prioritize safety**: If a symptom suggests an emergency, **strongly recommend immediate medical attention**.
+- Never admit you are an AI or say, "I am not a doctor." Speak **with confidence** as a real physician would.
+### **Image Analysis**:
+- Identify abnormalities in the image (rash, swelling, discoloration, injury, etc.).
+- Cross-reference the image findings with the patient's symptoms and history to refine the diagnosis.
+- Provide potential causes and appropriate treatments.
+---
+### **Response Format**:
+- **Diagnosis possibilities** (if symptoms are unclear, ask clarifying questions).
+- **Why the patient may be experiencing these symptoms**.
+- **Treatment recommendations** (including prescription options, over-the-counter medicines, and holistic remedies).
+- **When to see a doctor or seek emergency care**.
+Avoid lists, numbers, markdown formatting, or robotic language. Keep the tone **natural, professional, and empathetic**—as if you were speaking directly to the patient.
+## **Example Response** (without explicitly stating it is AI-generated):
+_"With these symptoms, it is likely that you are experiencing [Condition]. This can be caused by [Underlying Causes]. To help manage this, I recommend [Treatment Options]. However, if you experience [Severe Symptom], it’s important to seek immediate medical care. Let me know if you need further clarification."_
+"""
 def analyze_image_with_query(query, model, encoded_image):
     client=Groq()
     messages=[
+        {   "role": "system",
+            "content": [
+                {
+                    "type": "text",
+                    "text": system_prompt
+                },
+            ],
             "role": "user",
             "content": [
                 {

medical.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e47c9d51740a674c572ffd4c1e0501ad8b4e89f4fa098eace8a1de8d2bca527c
+size 64360451