Spaces:
Running
Running
Update app.py
#8
by
aryanxxvii
- opened
README.md
CHANGED
@@ -41,18 +41,18 @@ ELEVENLABS_API_KEY=your_elevenlabs_api_key
|
|
41 |
|
42 |
## Model Details
|
43 |
|
44 |
-
- Vision Model: LLaVA 3.2
|
45 |
- Speech-to-Text: Whisper Large V3
|
46 |
- Text Generation: Groq
|
47 |
- Voice Generation: ElevenLabs
|
48 |
-
- Embeddings: sentence-transformers/all-
|
49 |
|
50 |
## Citation
|
51 |
|
52 |
If you use this space, please cite:
|
53 |
```
|
54 |
@misc{medivoicebot2024,
|
55 |
-
author = {
|
56 |
title = {AI Doctor with Vision and Voice},
|
57 |
year = {2024},
|
58 |
publisher = {Hugging Face Spaces},
|
|
|
41 |
|
42 |
## Model Details
|
43 |
|
44 |
+
- Vision Model: LLaVA 3.2 90B
|
45 |
- Speech-to-Text: Whisper Large V3
|
46 |
- Text Generation: Groq
|
47 |
- Voice Generation: ElevenLabs
|
48 |
+
- Embeddings: sentence-transformers/all-MiniLM-L6-v2
|
49 |
|
50 |
## Citation
|
51 |
|
52 |
If you use this space, please cite:
|
53 |
```
|
54 |
@misc{medivoicebot2024,
|
55 |
+
author = {Gaurav Gulati},
|
56 |
title = {AI Doctor with Vision and Voice},
|
57 |
year = {2024},
|
58 |
publisher = {Hugging Face Spaces},
|
app.py
CHANGED
@@ -38,56 +38,57 @@ embeddings = SentenceTransformerEmbeddings(
|
|
38 |
device=device
|
39 |
)
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
# Create vectorstore directory if it doesn't exist
|
46 |
-
vectorstore_path.mkdir(parents=True, exist_ok=True)
|
47 |
-
|
48 |
-
if not (vectorstore_path / "index.faiss").exists():
|
49 |
-
print("Creating new vectorstore...")
|
50 |
-
# Load and split the PDF
|
51 |
-
loader = PyPDFLoader("medical.pdf")
|
52 |
-
documents = loader.load()
|
53 |
|
54 |
-
#
|
55 |
-
|
56 |
-
chunk_size=2000,
|
57 |
-
chunk_overlap=100,
|
58 |
-
length_function=len,
|
59 |
-
)
|
60 |
-
texts = text_splitter.split_documents(documents)
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
def get_relevant_context(query):
|
93 |
try:
|
@@ -96,23 +97,21 @@ def get_relevant_context(query):
|
|
96 |
|
97 |
# Extract and combine the content from retrieved documents
|
98 |
context = "\n".join([doc.page_content for doc in docs])
|
|
|
|
|
|
|
99 |
|
100 |
-
return context
|
101 |
except Exception as e:
|
102 |
print(f"Error in similarity search: {e}")
|
103 |
return "Could not retrieve relevant context."
|
104 |
|
105 |
# Update system prompt to include retrieved context
|
106 |
def get_enhanced_prompt(query, context):
|
107 |
-
enhanced_prompt = f"""
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
Donot say 'In the image I see' but say 'With what I see, I think you have ....'
|
113 |
-
Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
|
114 |
-
Keep your answer concise (max 2 sentences). No preamble, start your answer right away please.
|
115 |
-
Question from patient: {query}"""
|
116 |
return enhanced_prompt
|
117 |
|
118 |
def process_inputs(audio_filepath, image_filepath):
|
@@ -126,7 +125,7 @@ def process_inputs(audio_filepath, image_filepath):
|
|
126 |
# Handle the image input
|
127 |
if image_filepath:
|
128 |
enhanced_prompt = get_enhanced_prompt(speech_to_text_output, context)
|
129 |
-
doctor_response = analyze_image_with_query(query=enhanced_prompt, encoded_image=encode_image(image_filepath), model="llama-3.2-
|
130 |
else:
|
131 |
doctor_response = "No image provided for me to analyze"
|
132 |
|
@@ -134,7 +133,7 @@ def process_inputs(audio_filepath, image_filepath):
|
|
134 |
output_filepath = "output_audio.mp3"
|
135 |
voice_of_doctor = text_to_speech_with_elevenlabs(input_text=doctor_response, output_filepath=output_filepath)
|
136 |
|
137 |
-
return speech_to_text_output, doctor_response, output_filepath
|
138 |
|
139 |
|
140 |
# Create the interface
|
@@ -145,6 +144,8 @@ iface = gr.Interface(
|
|
145 |
gr.Image(type="filepath")
|
146 |
],
|
147 |
outputs=[
|
|
|
|
|
148 |
gr.Textbox(label="Speech to Text"),
|
149 |
gr.Textbox(label="Doctor's Response"),
|
150 |
gr.Audio(label="Doctor's Voice")
|
|
|
38 |
device=device
|
39 |
)
|
40 |
|
41 |
+
def create_vectorstore():
|
42 |
+
# Define vectorstore paths consistently
|
43 |
+
VECTORSTORE_DIR = "vectorstore/db_faiss"
|
44 |
+
vectorstore_path = pathlib.Path(VECTORSTORE_DIR)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
+
# Create vectorstore directory if it doesn't exist
|
47 |
+
vectorstore_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
+
if not (vectorstore_path / "index.faiss").exists():
|
50 |
+
print("Creating new vectorstore...")
|
51 |
+
# Load and split the PDF
|
52 |
+
loader = PyPDFLoader("medical.pdf")
|
53 |
+
documents = loader.load()
|
54 |
+
|
55 |
+
# Split documents into chunks
|
56 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
57 |
+
chunk_size=2000,
|
58 |
+
chunk_overlap=100,
|
59 |
+
length_function=len,
|
60 |
+
)
|
61 |
+
texts = text_splitter.split_documents(documents)
|
62 |
+
|
63 |
+
# Create and save the vectorstore
|
64 |
+
vectorstore = FAISS.from_documents(texts, embeddings)
|
65 |
+
|
66 |
+
# If CUDA is available, convert index to GPU
|
67 |
+
if device == "cuda":
|
68 |
+
res = faiss.StandardGpuResources() # Initialize GPU resources
|
69 |
+
index = vectorstore.index
|
70 |
+
gpu_index = faiss.index_cpu_to_gpu(res, 0, index) # Move to GPU
|
71 |
+
vectorstore.index = gpu_index
|
72 |
+
|
73 |
+
# Save the vectorstore
|
74 |
+
vectorstore.save_local(VECTORSTORE_DIR)
|
75 |
+
print("Vectorstore created and saved successfully.")
|
76 |
+
else:
|
77 |
+
print("Loading existing vectorstore...")
|
78 |
+
# Load existing vectorstore
|
79 |
+
vectorstore = FAISS.load_local(
|
80 |
+
folder_path=VECTORSTORE_DIR,
|
81 |
+
embeddings=embeddings,
|
82 |
+
allow_dangerous_deserialization=True
|
83 |
+
)
|
84 |
+
|
85 |
+
# If CUDA is available, convert loaded index to GPU
|
86 |
+
if device == "cuda":
|
87 |
+
res = faiss.StandardGpuResources() # Initialize GPU resources
|
88 |
+
index = vectorstore.index
|
89 |
+
gpu_index = faiss.index_cpu_to_gpu(res, 0, index) # Move to GPU
|
90 |
+
vectorstore.index = gpu_index
|
91 |
+
print("Vectorstore loaded successfully.")
|
92 |
|
93 |
def get_relevant_context(query):
|
94 |
try:
|
|
|
97 |
|
98 |
# Extract and combine the content from retrieved documents
|
99 |
context = "\n".join([doc.page_content for doc in docs])
|
100 |
+
context = "Use the following medical context to inform your response: " + context
|
101 |
+
|
102 |
+
return context if not context else ""
|
103 |
|
|
|
104 |
except Exception as e:
|
105 |
print(f"Error in similarity search: {e}")
|
106 |
return "Could not retrieve relevant context."
|
107 |
|
108 |
# Update system prompt to include retrieved context
|
109 |
def get_enhanced_prompt(query, context):
|
110 |
+
enhanced_prompt = f"""### **Patient Information**:
|
111 |
+
**Patient Query**: {query}
|
112 |
+
{context}
|
113 |
+
"""
|
114 |
+
|
|
|
|
|
|
|
|
|
115 |
return enhanced_prompt
|
116 |
|
117 |
def process_inputs(audio_filepath, image_filepath):
|
|
|
125 |
# Handle the image input
|
126 |
if image_filepath:
|
127 |
enhanced_prompt = get_enhanced_prompt(speech_to_text_output, context)
|
128 |
+
doctor_response = analyze_image_with_query(query=enhanced_prompt, encoded_image=encode_image(image_filepath), model="llama-3.2-90b-vision-preview")
|
129 |
else:
|
130 |
doctor_response = "No image provided for me to analyze"
|
131 |
|
|
|
133 |
output_filepath = "output_audio.mp3"
|
134 |
voice_of_doctor = text_to_speech_with_elevenlabs(input_text=doctor_response, output_filepath=output_filepath)
|
135 |
|
136 |
+
return enhanced_prompt, encoded_image, speech_to_text_output, doctor_response, output_filepath
|
137 |
|
138 |
|
139 |
# Create the interface
|
|
|
144 |
gr.Image(type="filepath")
|
145 |
],
|
146 |
outputs=[
|
147 |
+
gr.Textbox(label="Prompt"),
|
148 |
+
gr.Textbox(label="Encoded Image"),
|
149 |
gr.Textbox(label="Speech to Text"),
|
150 |
gr.Textbox(label="Doctor's Response"),
|
151 |
gr.Audio(label="Doctor's Voice")
|
brain.py
CHANGED
@@ -15,11 +15,46 @@ from groq import Groq
|
|
15 |
|
16 |
query="Is there something wrong with my face?"
|
17 |
model="llama-3.2-90b-vision-preview"
|
|
|
|
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
def analyze_image_with_query(query, model, encoded_image):
|
20 |
client=Groq()
|
21 |
messages=[
|
22 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
"role": "user",
|
24 |
"content": [
|
25 |
{
|
|
|
15 |
|
16 |
query="Is there something wrong with my face?"
|
17 |
model="llama-3.2-90b-vision-preview"
|
18 |
+
system_prompt="""You are now acting as a **highly skilled, board-certified medical doctor** specializing in **general medicine, diagnostics, and holistic treatments**.
|
19 |
+
Your role is to provide **accurate, empathetic, and professional medical advice** based on the patient's symptoms, history, and any images they provide.
|
20 |
|
21 |
+
## **Response Guidelines**:
|
22 |
+
- Analyze the patient's question **as if you were a real doctor**.
|
23 |
+
- If an image is provided, interpret it carefully and describe any medical findings without explicitly stating "I see in the image". Instead, use **natural doctor-patient conversation** style.
|
24 |
+
- When diagnosing, consider multiple possibilities (**differential diagnosis**) and explain the reasoning behind them.
|
25 |
+
- Provide **both pharmaceutical and natural remedies** where applicable.
|
26 |
+
- **Always prioritize safety**: If a symptom suggests an emergency, **strongly recommend immediate medical attention**.
|
27 |
+
- Never admit you are an AI or say, "I am not a doctor." Speak **with confidence** as a real physician would.
|
28 |
+
|
29 |
+
### **Image Analysis**:
|
30 |
+
- Identify abnormalities in the image (rash, swelling, discoloration, injury, etc.).
|
31 |
+
- Cross-reference the image findings with the patient's symptoms and history to refine the diagnosis.
|
32 |
+
- Provide potential causes and appropriate treatments.
|
33 |
+
|
34 |
+
---
|
35 |
+
|
36 |
+
### **Response Format**:
|
37 |
+
- **Diagnosis possibilities** (if symptoms are unclear, ask clarifying questions).
|
38 |
+
- **Why the patient may be experiencing these symptoms**.
|
39 |
+
- **Treatment recommendations** (including prescription options, over-the-counter medicines, and holistic remedies).
|
40 |
+
- **When to see a doctor or seek emergency care**.
|
41 |
+
|
42 |
+
Avoid lists, numbers, markdown formatting, or robotic language. Keep the tone **natural, professional, and empathetic**—as if you were speaking directly to the patient.
|
43 |
+
|
44 |
+
## **Example Response** (without explicitly stating it is AI-generated):
|
45 |
+
_"With these symptoms, it is likely that you are experiencing [Condition]. This can be caused by [Underlying Causes]. To help manage this, I recommend [Treatment Options]. However, if you experience [Severe Symptom], it’s important to seek immediate medical care. Let me know if you need further clarification."_
|
46 |
+
|
47 |
+
"""
|
48 |
def analyze_image_with_query(query, model, encoded_image):
|
49 |
client=Groq()
|
50 |
messages=[
|
51 |
+
{ "role": "system",
|
52 |
+
"content": [
|
53 |
+
{
|
54 |
+
"type": "text",
|
55 |
+
"text": system_prompt
|
56 |
+
},
|
57 |
+
],
|
58 |
"role": "user",
|
59 |
"content": [
|
60 |
{
|
medical.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e47c9d51740a674c572ffd4c1e0501ad8b4e89f4fa098eace8a1de8d2bca527c
|
3 |
+
size 64360451
|