Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,20 +1,22 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
-
import soundfile as sf
|
|
|
4 |
from brain import encode_image, analyze_image_with_query
|
5 |
-
from patientvoice import transcribe_with_groq
|
6 |
-
from doctorvoice import
|
7 |
from dotenv import load_dotenv
|
8 |
|
9 |
# Load environment variables
|
10 |
load_dotenv()
|
|
|
11 |
system_prompt = """You have to act as a professional doctor, i know you are not but this is for learning purpose.
|
12 |
What's in this image?. Do you find anything wrong with it medically?
|
13 |
If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in
|
14 |
your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
|
15 |
Donot say 'In the image I see' but say 'With what I see, I think you have ....'
|
16 |
Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
|
17 |
-
Keep your answer concise (max
|
18 |
|
19 |
def process_inputs(audio_data, image_filepath):
|
20 |
# Handle audio input from microphone
|
@@ -38,7 +40,7 @@ def process_inputs(audio_data, image_filepath):
|
|
38 |
else:
|
39 |
speech_to_text_output = "No audio provided"
|
40 |
|
41 |
-
#
|
42 |
if image_filepath:
|
43 |
try:
|
44 |
doctor_response = analyze_image_with_query(
|
@@ -53,17 +55,14 @@ def process_inputs(audio_data, image_filepath):
|
|
53 |
|
54 |
# Generate doctor's voice with error handling
|
55 |
try:
|
56 |
-
voice_of_doctor =
|
57 |
-
input_text=doctor_response,
|
58 |
-
output_filepath="final.mp3"
|
59 |
-
)
|
60 |
except Exception as e:
|
61 |
voice_of_doctor = None
|
62 |
print(f"Error in text-to-speech: {str(e)}")
|
63 |
|
64 |
return speech_to_text_output, doctor_response, voice_of_doctor
|
65 |
|
66 |
-
#
|
67 |
iface = gr.Interface(
|
68 |
fn=process_inputs,
|
69 |
inputs=[
|
@@ -75,9 +74,8 @@ iface = gr.Interface(
|
|
75 |
gr.Textbox(label="Doctor's Response"),
|
76 |
gr.Audio(label="Doctor's Voice")
|
77 |
],
|
78 |
-
title="MediVox: AI Doctor with Vision and Voice",
|
79 |
css=".gradio-container {text-align: center;}"
|
80 |
)
|
81 |
|
82 |
-
# Launch the interface
|
83 |
iface.launch()
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
+
import soundfile as sf # For audio handling
|
4 |
+
|
5 |
from brain import encode_image, analyze_image_with_query
|
6 |
+
from patientvoice import record_audio, transcribe_with_groq
|
7 |
+
from doctorvoice import text_to_speech_with_gtts
|
8 |
from dotenv import load_dotenv
|
9 |
|
10 |
# Load environment variables
|
11 |
load_dotenv()
|
12 |
+
|
13 |
system_prompt = """You have to act as a professional doctor, i know you are not but this is for learning purpose.
|
14 |
What's in this image?. Do you find anything wrong with it medically?
|
15 |
If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in
|
16 |
your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
|
17 |
Donot say 'In the image I see' but say 'With what I see, I think you have ....'
|
18 |
Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
|
19 |
+
Keep your answer concise (max 2 sentences). No preamble, start your answer right away please"""
|
20 |
|
21 |
def process_inputs(audio_data, image_filepath):
|
22 |
# Handle audio input from microphone
|
|
|
40 |
else:
|
41 |
speech_to_text_output = "No audio provided"
|
42 |
|
43 |
+
# Handle the image input with error handling
|
44 |
if image_filepath:
|
45 |
try:
|
46 |
doctor_response = analyze_image_with_query(
|
|
|
55 |
|
56 |
# Generate doctor's voice with error handling
|
57 |
try:
|
58 |
+
voice_of_doctor = text_to_speech_with_gtts(input_text=doctor_response, output_filepath="final.mp3")
|
|
|
|
|
|
|
59 |
except Exception as e:
|
60 |
voice_of_doctor = None
|
61 |
print(f"Error in text-to-speech: {str(e)}")
|
62 |
|
63 |
return speech_to_text_output, doctor_response, voice_of_doctor
|
64 |
|
65 |
+
# Create the interface
|
66 |
iface = gr.Interface(
|
67 |
fn=process_inputs,
|
68 |
inputs=[
|
|
|
74 |
gr.Textbox(label="Doctor's Response"),
|
75 |
gr.Audio(label="Doctor's Voice")
|
76 |
],
|
77 |
+
title="MediVox : AI Doctor with Vision and Voice",
|
78 |
css=".gradio-container {text-align: center;}"
|
79 |
)
|
80 |
|
|
|
81 |
iface.launch()
|