Files changed (4) hide show
  1. README.md +3 -3
  2. app.py +61 -60
  3. brain.py +36 -1
  4. medical.pdf +3 -0
README.md CHANGED
@@ -41,18 +41,18 @@ ELEVENLABS_API_KEY=your_elevenlabs_api_key
41
 
42
  ## Model Details
43
 
44
- - Vision Model: LLaVA 3.2 11B
45
  - Speech-to-Text: Whisper Large V3
46
  - Text Generation: Groq
47
  - Voice Generation: ElevenLabs
48
- - Embeddings: sentence-transformers/all-mpnet-base-v2
49
 
50
  ## Citation
51
 
52
  If you use this space, please cite:
53
  ```
54
  @misc{medivoicebot2024,
55
- author = {Your Name},
56
  title = {AI Doctor with Vision and Voice},
57
  year = {2024},
58
  publisher = {Hugging Face Spaces},
 
41
 
42
  ## Model Details
43
 
44
+ - Vision Model: LLaVA 3.2 90B
45
  - Speech-to-Text: Whisper Large V3
46
  - Text Generation: Groq
47
  - Voice Generation: ElevenLabs
48
+ - Embeddings: sentence-transformers/all-MiniLM-L6-v2
49
 
50
  ## Citation
51
 
52
  If you use this space, please cite:
53
  ```
54
  @misc{medivoicebot2024,
55
+ author = {Gaurav Gulati},
56
  title = {AI Doctor with Vision and Voice},
57
  year = {2024},
58
  publisher = {Hugging Face Spaces},
app.py CHANGED
@@ -38,56 +38,57 @@ embeddings = SentenceTransformerEmbeddings(
38
  device=device
39
  )
40
 
41
- # Define vectorstore paths consistently
42
- VECTORSTORE_DIR = "vectorstore/db_faiss"
43
- vectorstore_path = pathlib.Path(VECTORSTORE_DIR)
44
-
45
- # Create vectorstore directory if it doesn't exist
46
- vectorstore_path.mkdir(parents=True, exist_ok=True)
47
-
48
- if not (vectorstore_path / "index.faiss").exists():
49
- print("Creating new vectorstore...")
50
- # Load and split the PDF
51
- loader = PyPDFLoader("medical.pdf")
52
- documents = loader.load()
53
 
54
- # Split documents into chunks
55
- text_splitter = RecursiveCharacterTextSplitter(
56
- chunk_size=2000,
57
- chunk_overlap=100,
58
- length_function=len,
59
- )
60
- texts = text_splitter.split_documents(documents)
61
 
62
- # Create and save the vectorstore
63
- vectorstore = FAISS.from_documents(texts, embeddings)
64
-
65
- # If CUDA is available, convert index to GPU
66
- if device == "cuda":
67
- res = faiss.StandardGpuResources() # Initialize GPU resources
68
- index = vectorstore.index
69
- gpu_index = faiss.index_cpu_to_gpu(res, 0, index) # Move to GPU
70
- vectorstore.index = gpu_index
71
-
72
- # Save the vectorstore
73
- vectorstore.save_local(VECTORSTORE_DIR)
74
- print("Vectorstore created and saved successfully.")
75
- else:
76
- print("Loading existing vectorstore...")
77
- # Load existing vectorstore
78
- vectorstore = FAISS.load_local(
79
- folder_path=VECTORSTORE_DIR,
80
- embeddings=embeddings,
81
- allow_dangerous_deserialization=True
82
- )
83
-
84
- # If CUDA is available, convert loaded index to GPU
85
- if device == "cuda":
86
- res = faiss.StandardGpuResources() # Initialize GPU resources
87
- index = vectorstore.index
88
- gpu_index = faiss.index_cpu_to_gpu(res, 0, index) # Move to GPU
89
- vectorstore.index = gpu_index
90
- print("Vectorstore loaded successfully.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  def get_relevant_context(query):
93
  try:
@@ -96,23 +97,21 @@ def get_relevant_context(query):
96
 
97
  # Extract and combine the content from retrieved documents
98
  context = "\n".join([doc.page_content for doc in docs])
 
 
 
99
 
100
- return context
101
  except Exception as e:
102
  print(f"Error in similarity search: {e}")
103
  return "Could not retrieve relevant context."
104
 
105
  # Update system prompt to include retrieved context
106
  def get_enhanced_prompt(query, context):
107
- enhanced_prompt = f"""You have to act as a professional doctor, i know you are not but this is for learning purpose.
108
- Use the following medical context to inform your response: {context}
109
- What's in this image?. Do you find anything wrong with it medically?
110
- If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in
111
- your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
112
- Donot say 'In the image I see' but say 'With what I see, I think you have ....'
113
- Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
114
- Keep your answer concise (max 2 sentences). No preamble, start your answer right away please.
115
- Question from patient: {query}"""
116
  return enhanced_prompt
117
 
118
  def process_inputs(audio_filepath, image_filepath):
@@ -126,7 +125,7 @@ def process_inputs(audio_filepath, image_filepath):
126
  # Handle the image input
127
  if image_filepath:
128
  enhanced_prompt = get_enhanced_prompt(speech_to_text_output, context)
129
- doctor_response = analyze_image_with_query(query=enhanced_prompt, encoded_image=encode_image(image_filepath), model="llama-3.2-11b-vision-preview")
130
  else:
131
  doctor_response = "No image provided for me to analyze"
132
 
@@ -134,7 +133,7 @@ def process_inputs(audio_filepath, image_filepath):
134
  output_filepath = "output_audio.mp3"
135
  voice_of_doctor = text_to_speech_with_elevenlabs(input_text=doctor_response, output_filepath=output_filepath)
136
 
137
- return speech_to_text_output, doctor_response, output_filepath
138
 
139
 
140
  # Create the interface
@@ -145,6 +144,8 @@ iface = gr.Interface(
145
  gr.Image(type="filepath")
146
  ],
147
  outputs=[
 
 
148
  gr.Textbox(label="Speech to Text"),
149
  gr.Textbox(label="Doctor's Response"),
150
  gr.Audio(label="Doctor's Voice")
 
38
  device=device
39
  )
40
 
41
+ def create_vectorstore():
42
+ # Define vectorstore paths consistently
43
+ VECTORSTORE_DIR = "vectorstore/db_faiss"
44
+ vectorstore_path = pathlib.Path(VECTORSTORE_DIR)
 
 
 
 
 
 
 
 
45
 
46
+ # Create vectorstore directory if it doesn't exist
47
+ vectorstore_path.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
48
 
49
+ if not (vectorstore_path / "index.faiss").exists():
50
+ print("Creating new vectorstore...")
51
+ # Load and split the PDF
52
+ loader = PyPDFLoader("medical.pdf")
53
+ documents = loader.load()
54
+
55
+ # Split documents into chunks
56
+ text_splitter = RecursiveCharacterTextSplitter(
57
+ chunk_size=2000,
58
+ chunk_overlap=100,
59
+ length_function=len,
60
+ )
61
+ texts = text_splitter.split_documents(documents)
62
+
63
+ # Create and save the vectorstore
64
+ vectorstore = FAISS.from_documents(texts, embeddings)
65
+
66
+ # If CUDA is available, convert index to GPU
67
+ if device == "cuda":
68
+ res = faiss.StandardGpuResources() # Initialize GPU resources
69
+ index = vectorstore.index
70
+ gpu_index = faiss.index_cpu_to_gpu(res, 0, index) # Move to GPU
71
+ vectorstore.index = gpu_index
72
+
73
+ # Save the vectorstore
74
+ vectorstore.save_local(VECTORSTORE_DIR)
75
+ print("Vectorstore created and saved successfully.")
76
+ else:
77
+ print("Loading existing vectorstore...")
78
+ # Load existing vectorstore
79
+ vectorstore = FAISS.load_local(
80
+ folder_path=VECTORSTORE_DIR,
81
+ embeddings=embeddings,
82
+ allow_dangerous_deserialization=True
83
+ )
84
+
85
+ # If CUDA is available, convert loaded index to GPU
86
+ if device == "cuda":
87
+ res = faiss.StandardGpuResources() # Initialize GPU resources
88
+ index = vectorstore.index
89
+ gpu_index = faiss.index_cpu_to_gpu(res, 0, index) # Move to GPU
90
+ vectorstore.index = gpu_index
91
+ print("Vectorstore loaded successfully.")
92
 
93
  def get_relevant_context(query):
94
  try:
 
97
 
98
  # Extract and combine the content from retrieved documents
99
  context = "\n".join([doc.page_content for doc in docs])
100
+ context = "Use the following medical context to inform your response: " + context
101
+
102
+ return context if not context else ""
103
 
 
104
  except Exception as e:
105
  print(f"Error in similarity search: {e}")
106
  return "Could not retrieve relevant context."
107
 
108
  # Update system prompt to include retrieved context
109
  def get_enhanced_prompt(query, context):
110
+ enhanced_prompt = f"""### **Patient Information**:
111
+ **Patient Query**: {query}
112
+ {context}
113
+ """
114
+
 
 
 
 
115
  return enhanced_prompt
116
 
117
  def process_inputs(audio_filepath, image_filepath):
 
125
  # Handle the image input
126
  if image_filepath:
127
  enhanced_prompt = get_enhanced_prompt(speech_to_text_output, context)
128
+ doctor_response = analyze_image_with_query(query=enhanced_prompt, encoded_image=encode_image(image_filepath), model="llama-3.2-90b-vision-preview")
129
  else:
130
  doctor_response = "No image provided for me to analyze"
131
 
 
133
  output_filepath = "output_audio.mp3"
134
  voice_of_doctor = text_to_speech_with_elevenlabs(input_text=doctor_response, output_filepath=output_filepath)
135
 
136
+ return enhanced_prompt, encoded_image, speech_to_text_output, doctor_response, output_filepath
137
 
138
 
139
  # Create the interface
 
144
  gr.Image(type="filepath")
145
  ],
146
  outputs=[
147
+ gr.Textbox(label="Prompt"),
148
+ gr.Textbox(label="Encoded Image"),
149
  gr.Textbox(label="Speech to Text"),
150
  gr.Textbox(label="Doctor's Response"),
151
  gr.Audio(label="Doctor's Voice")
brain.py CHANGED
@@ -15,11 +15,46 @@ from groq import Groq
15
 
16
  query="Is there something wrong with my face?"
17
  model="llama-3.2-90b-vision-preview"
 
 
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  def analyze_image_with_query(query, model, encoded_image):
20
  client=Groq()
21
  messages=[
22
- {
 
 
 
 
 
 
23
  "role": "user",
24
  "content": [
25
  {
 
15
 
16
  query="Is there something wrong with my face?"
17
  model="llama-3.2-90b-vision-preview"
18
+ system_prompt="""You are now acting as a **highly skilled, board-certified medical doctor** specializing in **general medicine, diagnostics, and holistic treatments**.
19
+ Your role is to provide **accurate, empathetic, and professional medical advice** based on the patient's symptoms, history, and any images they provide.
20
 
21
+ ## **Response Guidelines**:
22
+ - Analyze the patient's question **as if you were a real doctor**.
23
+ - If an image is provided, interpret it carefully and describe any medical findings without explicitly stating "I see in the image". Instead, use **natural doctor-patient conversation** style.
24
+ - When diagnosing, consider multiple possibilities (**differential diagnosis**) and explain the reasoning behind them.
25
+ - Provide **both pharmaceutical and natural remedies** where applicable.
26
+ - **Always prioritize safety**: If a symptom suggests an emergency, **strongly recommend immediate medical attention**.
27
+ - Never admit you are an AI or say, "I am not a doctor." Speak **with confidence** as a real physician would.
28
+
29
+ ### **Image Analysis**:
30
+ - Identify abnormalities in the image (rash, swelling, discoloration, injury, etc.).
31
+ - Cross-reference the image findings with the patient's symptoms and history to refine the diagnosis.
32
+ - Provide potential causes and appropriate treatments.
33
+
34
+ ---
35
+
36
+ ### **Response Format**:
37
+ - **Diagnosis possibilities** (if symptoms are unclear, ask clarifying questions).
38
+ - **Why the patient may be experiencing these symptoms**.
39
+ - **Treatment recommendations** (including prescription options, over-the-counter medicines, and holistic remedies).
40
+ - **When to see a doctor or seek emergency care**.
41
+
42
+ Avoid lists, numbers, markdown formatting, or robotic language. Keep the tone **natural, professional, and empathetic**—as if you were speaking directly to the patient.
43
+
44
+ ## **Example Response** (without explicitly stating it is AI-generated):
45
+ _"With these symptoms, it is likely that you are experiencing [Condition]. This can be caused by [Underlying Causes]. To help manage this, I recommend [Treatment Options]. However, if you experience [Severe Symptom], it’s important to seek immediate medical care. Let me know if you need further clarification."_
46
+
47
+ """
48
  def analyze_image_with_query(query, model, encoded_image):
49
  client=Groq()
50
  messages=[
51
+ { "role": "system",
52
+ "content": [
53
+ {
54
+ "type": "text",
55
+ "text": system_prompt
56
+ },
57
+ ],
58
  "role": "user",
59
  "content": [
60
  {
medical.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e47c9d51740a674c572ffd4c1e0501ad8b4e89f4fa098eace8a1de8d2bca527c
3
+ size 64360451