Spaces:

gauravgulati619
/

MediVox

Running

App Files Files Community

gauravgulati619 commited on 13 days ago

Commit

9fd7b07

verified ·

1 Parent(s): 5883d20

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -12

app.py CHANGED Viewed

@@ -1,20 +1,22 @@
 import os
 import gradio as gr
-import soundfile as sf
 from brain import encode_image, analyze_image_with_query
-from patientvoice import transcribe_with_groq
-from doctorvoice import text_to_speech_with_elevenlabs
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
 system_prompt = """You have to act as a professional doctor, i know you are not but this is for learning purpose.
             What's in this image?. Do you find anything wrong with it medically?
             If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in
             your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
             Donot say 'In the image I see' but say 'With what I see, I think you have ....'
             Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
-            Keep your answer concise (max 5 sentences). No preamble, start your answer right away please"""
 def process_inputs(audio_data, image_filepath):
     # Handle audio input from microphone
@@ -38,7 +40,7 @@ def process_inputs(audio_data, image_filepath):
     else:
         speech_to_text_output = "No audio provided"
-    # Process image input with error handling
     if image_filepath:
         try:
             doctor_response = analyze_image_with_query(
@@ -53,17 +55,14 @@ def process_inputs(audio_data, image_filepath):
     # Generate doctor's voice with error handling
     try:
-        voice_of_doctor = text_to_speech_with_elevenlabs(
-            input_text=doctor_response,
-            output_filepath="final.mp3"
-        )
     except Exception as e:
         voice_of_doctor = None
         print(f"Error in text-to-speech: {str(e)}")
     return speech_to_text_output, doctor_response, voice_of_doctor
-# Define Gradio interface
 iface = gr.Interface(
     fn=process_inputs,
     inputs=[
@@ -75,9 +74,8 @@ iface = gr.Interface(
         gr.Textbox(label="Doctor's Response"),
         gr.Audio(label="Doctor's Voice")
     ],
-    title="MediVox: AI Doctor with Vision and Voice",
     css=".gradio-container {text-align: center;}"
 )
-# Launch the interface
 iface.launch()

 import os
 import gradio as gr
+import soundfile as sf  # For audio handling
 from brain import encode_image, analyze_image_with_query
+from patientvoice import record_audio, transcribe_with_groq
+from doctorvoice import text_to_speech_with_gtts
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
 system_prompt = """You have to act as a professional doctor, i know you are not but this is for learning purpose.
             What's in this image?. Do you find anything wrong with it medically?
             If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in
             your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
             Donot say 'In the image I see' but say 'With what I see, I think you have ....'
             Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
+            Keep your answer concise (max 2 sentences). No preamble, start your answer right away please"""
 def process_inputs(audio_data, image_filepath):
     # Handle audio input from microphone
     else:
         speech_to_text_output = "No audio provided"
+    # Handle the image input with error handling
     if image_filepath:
         try:
             doctor_response = analyze_image_with_query(
     # Generate doctor's voice with error handling
     try:
+        voice_of_doctor = text_to_speech_with_gtts(input_text=doctor_response, output_filepath="final.mp3")
     except Exception as e:
         voice_of_doctor = None
         print(f"Error in text-to-speech: {str(e)}")
     return speech_to_text_output, doctor_response, voice_of_doctor
+# Create the interface
 iface = gr.Interface(
     fn=process_inputs,
     inputs=[
         gr.Textbox(label="Doctor's Response"),
         gr.Audio(label="Doctor's Voice")
     ],
+    title="MediVox : AI Doctor with Vision and Voice",
     css=".gradio-container {text-align: center;}"
 )
 iface.launch()